9409bdea81f9ac4824f4d1171a8d1c463fda719b
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nvc0/nvc0_query.h"
29 #include "nvc0/nvc0_query_sw.h"
30
31 #include "nv_object.xml.h"
32 #include "nvc0/nve4_compute.xml.h"
33 #include "nvc0/nvc0_compute.xml.h"
34
35 #define NVC0_QUERY_STATE_READY 0
36 #define NVC0_QUERY_STATE_ACTIVE 1
37 #define NVC0_QUERY_STATE_ENDED 2
38 #define NVC0_QUERY_STATE_FLUSHED 3
39
40 #define NVC0_QUERY_ALLOC_SPACE 256
41
42 static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
43 struct nvc0_query *);
44 static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
45 static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
46 struct nvc0_query *, void *, boolean);
47
48 static bool
49 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
50 {
51 struct nvc0_screen *screen = nvc0->screen;
52 int ret;
53
54 if (q->bo) {
55 nouveau_bo_ref(NULL, &q->bo);
56 if (q->mm) {
57 if (q->state == NVC0_QUERY_STATE_READY)
58 nouveau_mm_free(q->mm);
59 else
60 nouveau_fence_work(screen->base.fence.current,
61 nouveau_mm_free_work, q->mm);
62 }
63 }
64 if (size) {
65 q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
66 if (!q->bo)
67 return false;
68 q->offset = q->base;
69
70 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
71 if (ret) {
72 nvc0_query_allocate(nvc0, q, 0);
73 return false;
74 }
75 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
76 }
77 return true;
78 }
79
80 static void
81 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
82 {
83 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
84 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
85 FREE(nvc0_query(pq));
86 }
87
88 static struct pipe_query *
89 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
90 {
91 struct nvc0_context *nvc0 = nvc0_context(pipe);
92 struct nvc0_query *q;
93 unsigned space = NVC0_QUERY_ALLOC_SPACE;
94
95 q = nvc0_sw_create_query(nvc0, type, index);
96 if (q)
97 return (struct pipe_query *)q;
98
99 q = CALLOC_STRUCT(nvc0_query);
100 if (!q)
101 return NULL;
102
103 switch (type) {
104 case PIPE_QUERY_OCCLUSION_COUNTER:
105 case PIPE_QUERY_OCCLUSION_PREDICATE:
106 q->rotate = 32;
107 space = NVC0_QUERY_ALLOC_SPACE;
108 break;
109 case PIPE_QUERY_PIPELINE_STATISTICS:
110 q->is64bit = true;
111 space = 512;
112 break;
113 case PIPE_QUERY_SO_STATISTICS:
114 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
115 q->is64bit = true;
116 space = 64;
117 break;
118 case PIPE_QUERY_PRIMITIVES_GENERATED:
119 case PIPE_QUERY_PRIMITIVES_EMITTED:
120 q->is64bit = true;
121 q->index = index;
122 space = 32;
123 break;
124 case PIPE_QUERY_TIME_ELAPSED:
125 case PIPE_QUERY_TIMESTAMP:
126 case PIPE_QUERY_TIMESTAMP_DISJOINT:
127 case PIPE_QUERY_GPU_FINISHED:
128 space = 32;
129 break;
130 case NVC0_QUERY_TFB_BUFFER_OFFSET:
131 space = 16;
132 break;
133 default:
134 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
135 if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
136 /* for each MP:
137 * [00] = WS0.C0
138 * [04] = WS0.C1
139 * [08] = WS0.C2
140 * [0c] = WS0.C3
141 * [10] = WS1.C0
142 * [14] = WS1.C1
143 * [18] = WS1.C2
144 * [1c] = WS1.C3
145 * [20] = WS2.C0
146 * [24] = WS2.C1
147 * [28] = WS2.C2
148 * [2c] = WS2.C3
149 * [30] = WS3.C0
150 * [34] = WS3.C1
151 * [38] = WS3.C2
152 * [3c] = WS3.C3
153 * [40] = MP.C4
154 * [44] = MP.C5
155 * [48] = MP.C6
156 * [4c] = MP.C7
157 * [50] = WS0.sequence
158 * [54] = WS1.sequence
159 * [58] = WS2.sequence
160 * [5c] = WS3.sequence
161 */
162 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
163 break;
164 } else
165 if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
166 /* for each MP:
167 * [00] = MP.C0
168 * [04] = MP.C1
169 * [08] = MP.C2
170 * [0c] = MP.C3
171 * [10] = MP.C4
172 * [14] = MP.C5
173 * [18] = MP.C6
174 * [1c] = MP.C7
175 * [20] = MP.sequence
176 */
177 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
178 break;
179 }
180 }
181 debug_printf("invalid query type: %u\n", type);
182 FREE(q);
183 return NULL;
184 }
185 if (!nvc0_query_allocate(nvc0, q, space)) {
186 FREE(q);
187 return NULL;
188 }
189
190 q->type = type;
191
192 if (q->rotate) {
193 /* we advance before query_begin ! */
194 q->offset -= q->rotate;
195 q->data -= q->rotate / sizeof(*q->data);
196 } else
197 if (!q->is64bit)
198 q->data[0] = 0; /* initialize sequence */
199
200 return (struct pipe_query *)q;
201 }
202
203 static void
204 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
205 unsigned offset, uint32_t get)
206 {
207 offset += q->offset;
208
209 PUSH_SPACE(push, 5);
210 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
211 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
212 PUSH_DATAh(push, q->bo->offset + offset);
213 PUSH_DATA (push, q->bo->offset + offset);
214 PUSH_DATA (push, q->sequence);
215 PUSH_DATA (push, get);
216 }
217
218 static void
219 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
220 {
221 q->offset += q->rotate;
222 q->data += q->rotate / sizeof(*q->data);
223 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
224 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
225 }
226
227 static boolean
228 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
229 {
230 struct nvc0_context *nvc0 = nvc0_context(pipe);
231 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
232 struct nvc0_query *q = nvc0_query(pq);
233 bool ret = true;
234
235 /* For occlusion queries we have to change the storage, because a previous
236 * query might set the initial render conition to false even *after* we re-
237 * initialized it to true.
238 */
239 if (q->rotate) {
240 nvc0_query_rotate(nvc0, q);
241
242 /* XXX: can we do this with the GPU, and sync with respect to a previous
243 * query ?
244 */
245 q->data[0] = q->sequence; /* initialize sequence */
246 q->data[1] = 1; /* initial render condition = true */
247 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
248 q->data[5] = 0;
249 }
250 q->sequence++;
251
252 switch (q->type) {
253 case PIPE_QUERY_OCCLUSION_COUNTER:
254 case PIPE_QUERY_OCCLUSION_PREDICATE:
255 q->nesting = nvc0->screen->num_occlusion_queries_active++;
256 if (q->nesting) {
257 nvc0_query_get(push, q, 0x10, 0x0100f002);
258 } else {
259 PUSH_SPACE(push, 3);
260 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
261 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
262 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
263 }
264 break;
265 case PIPE_QUERY_PRIMITIVES_GENERATED:
266 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
267 break;
268 case PIPE_QUERY_PRIMITIVES_EMITTED:
269 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
270 break;
271 case PIPE_QUERY_SO_STATISTICS:
272 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
273 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
274 break;
275 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
276 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
277 break;
278 case PIPE_QUERY_TIME_ELAPSED:
279 nvc0_query_get(push, q, 0x10, 0x00005002);
280 break;
281 case PIPE_QUERY_PIPELINE_STATISTICS:
282 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
283 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
284 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
285 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
286 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
287 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
288 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
289 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
290 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
291 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
292 break;
293 default:
294 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
295 if (q->type >= NVC0_SW_QUERY_DRV_STAT(0) &&
296 q->type <= NVC0_SW_QUERY_DRV_STAT_LAST) {
297 return q->funcs->begin_query(nvc0, q);
298 } else
299 #endif
300 if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
301 (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
302 ret = nvc0_hw_sm_query_begin(nvc0, q);
303 }
304 break;
305 }
306 q->state = NVC0_QUERY_STATE_ACTIVE;
307 return ret;
308 }
309
310 static void
311 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
312 {
313 struct nvc0_context *nvc0 = nvc0_context(pipe);
314 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
315 struct nvc0_query *q = nvc0_query(pq);
316
317 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
318 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
319 if (q->rotate)
320 nvc0_query_rotate(nvc0, q);
321 q->sequence++;
322 }
323 q->state = NVC0_QUERY_STATE_ENDED;
324
325 switch (q->type) {
326 case PIPE_QUERY_OCCLUSION_COUNTER:
327 case PIPE_QUERY_OCCLUSION_PREDICATE:
328 nvc0_query_get(push, q, 0, 0x0100f002);
329 if (--nvc0->screen->num_occlusion_queries_active == 0) {
330 PUSH_SPACE(push, 1);
331 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
332 }
333 break;
334 case PIPE_QUERY_PRIMITIVES_GENERATED:
335 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
336 break;
337 case PIPE_QUERY_PRIMITIVES_EMITTED:
338 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
339 break;
340 case PIPE_QUERY_SO_STATISTICS:
341 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
342 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
343 break;
344 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
345 /* TODO: How do we sum over all streams for render condition ? */
346 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
347 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
348 nvc0_query_get(push, q, 0x20, 0x00005002);
349 break;
350 case PIPE_QUERY_TIMESTAMP:
351 case PIPE_QUERY_TIME_ELAPSED:
352 nvc0_query_get(push, q, 0, 0x00005002);
353 break;
354 case PIPE_QUERY_GPU_FINISHED:
355 nvc0_query_get(push, q, 0, 0x1000f010);
356 break;
357 case PIPE_QUERY_PIPELINE_STATISTICS:
358 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
359 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
360 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
361 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
362 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
363 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
364 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
365 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
366 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
367 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
368 break;
369 case NVC0_QUERY_TFB_BUFFER_OFFSET:
370 /* indexed by TFB buffer instead of by vertex stream */
371 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
372 break;
373 case PIPE_QUERY_TIMESTAMP_DISJOINT:
374 /* This query is not issued on GPU because disjoint is forced to false */
375 q->state = NVC0_QUERY_STATE_READY;
376 break;
377 default:
378 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
379 if (q->type >= NVC0_SW_QUERY_DRV_STAT(0) &&
380 q->type <= NVC0_SW_QUERY_DRV_STAT_LAST) {
381 q->funcs->end_query(nvc0, q);
382 return;
383 } else
384 #endif
385 if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
386 (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
387 nvc0_hw_sm_query_end(nvc0, q);
388 }
389 break;
390 }
391 if (q->is64bit)
392 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
393 }
394
395 static inline void
396 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
397 {
398 if (q->is64bit) {
399 if (nouveau_fence_signalled(q->fence))
400 q->state = NVC0_QUERY_STATE_READY;
401 } else {
402 if (q->data[0] == q->sequence)
403 q->state = NVC0_QUERY_STATE_READY;
404 }
405 }
406
407 static boolean
408 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
409 boolean wait, union pipe_query_result *result)
410 {
411 struct nvc0_context *nvc0 = nvc0_context(pipe);
412 struct nvc0_query *q = nvc0_query(pq);
413 uint64_t *res64 = (uint64_t*)result;
414 uint32_t *res32 = (uint32_t*)result;
415 uint8_t *res8 = (uint8_t*)result;
416 uint64_t *data64 = (uint64_t *)q->data;
417 unsigned i;
418
419 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
420 if (q->type >= NVC0_SW_QUERY_DRV_STAT(0) &&
421 q->type <= NVC0_SW_QUERY_DRV_STAT_LAST) {
422 return q->funcs->get_query_result(nvc0, q, wait, result);
423 } else
424 #endif
425 if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
426 (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
427 return nvc0_hw_sm_query_result(nvc0, q, result, wait);
428 }
429
430 if (q->state != NVC0_QUERY_STATE_READY)
431 nvc0_query_update(nvc0->screen->base.client, q);
432
433 if (q->state != NVC0_QUERY_STATE_READY) {
434 if (!wait) {
435 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
436 q->state = NVC0_QUERY_STATE_FLUSHED;
437 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
438 PUSH_KICK(nvc0->base.pushbuf);
439 }
440 return false;
441 }
442 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
443 return false;
444 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
445 }
446 q->state = NVC0_QUERY_STATE_READY;
447
448 switch (q->type) {
449 case PIPE_QUERY_GPU_FINISHED:
450 res8[0] = true;
451 break;
452 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
453 res64[0] = q->data[1] - q->data[5];
454 break;
455 case PIPE_QUERY_OCCLUSION_PREDICATE:
456 res8[0] = q->data[1] != q->data[5];
457 break;
458 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
459 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
460 res64[0] = data64[0] - data64[2];
461 break;
462 case PIPE_QUERY_SO_STATISTICS:
463 res64[0] = data64[0] - data64[4];
464 res64[1] = data64[2] - data64[6];
465 break;
466 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
467 res8[0] = data64[0] != data64[2];
468 break;
469 case PIPE_QUERY_TIMESTAMP:
470 res64[0] = data64[1];
471 break;
472 case PIPE_QUERY_TIMESTAMP_DISJOINT:
473 res64[0] = 1000000000;
474 res8[8] = false;
475 break;
476 case PIPE_QUERY_TIME_ELAPSED:
477 res64[0] = data64[1] - data64[3];
478 break;
479 case PIPE_QUERY_PIPELINE_STATISTICS:
480 for (i = 0; i < 10; ++i)
481 res64[i] = data64[i * 2] - data64[24 + i * 2];
482 break;
483 case NVC0_QUERY_TFB_BUFFER_OFFSET:
484 res32[0] = q->data[1];
485 break;
486 default:
487 assert(0); /* can't happen, we don't create queries with invalid type */
488 return false;
489 }
490
491 return true;
492 }
493
494 void
495 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
496 {
497 unsigned offset = q->offset;
498
499 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
500
501 PUSH_SPACE(push, 5);
502 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
503 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
504 PUSH_DATAh(push, q->bo->offset + offset);
505 PUSH_DATA (push, q->bo->offset + offset);
506 PUSH_DATA (push, q->sequence);
507 PUSH_DATA (push, (1 << 12) |
508 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
509 }
510
511 static void
512 nvc0_render_condition(struct pipe_context *pipe,
513 struct pipe_query *pq,
514 boolean condition, uint mode)
515 {
516 struct nvc0_context *nvc0 = nvc0_context(pipe);
517 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
518 struct nvc0_query *q;
519 uint32_t cond;
520 bool wait =
521 mode != PIPE_RENDER_COND_NO_WAIT &&
522 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
523
524 if (!pq) {
525 cond = NVC0_3D_COND_MODE_ALWAYS;
526 }
527 else {
528 q = nvc0_query(pq);
529 /* NOTE: comparison of 2 queries only works if both have completed */
530 switch (q->type) {
531 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
532 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
533 NVC0_3D_COND_MODE_NOT_EQUAL;
534 wait = true;
535 break;
536 case PIPE_QUERY_OCCLUSION_COUNTER:
537 case PIPE_QUERY_OCCLUSION_PREDICATE:
538 if (likely(!condition)) {
539 if (unlikely(q->nesting))
540 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
541 NVC0_3D_COND_MODE_ALWAYS;
542 else
543 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
544 } else {
545 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
546 }
547 break;
548 default:
549 assert(!"render condition query not a predicate");
550 cond = NVC0_3D_COND_MODE_ALWAYS;
551 break;
552 }
553 }
554
555 nvc0->cond_query = pq;
556 nvc0->cond_cond = condition;
557 nvc0->cond_condmode = cond;
558 nvc0->cond_mode = mode;
559
560 if (!pq) {
561 PUSH_SPACE(push, 1);
562 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
563 return;
564 }
565
566 if (wait)
567 nvc0_query_fifo_wait(push, q);
568
569 PUSH_SPACE(push, 7);
570 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
571 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
572 PUSH_DATAh(push, q->bo->offset + q->offset);
573 PUSH_DATA (push, q->bo->offset + q->offset);
574 PUSH_DATA (push, cond);
575 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
576 PUSH_DATAh(push, q->bo->offset + q->offset);
577 PUSH_DATA (push, q->bo->offset + q->offset);
578 }
579
580 void
581 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
582 struct nvc0_query *q, unsigned result_offset)
583 {
584 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
585
586 PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
587 nouveau_pushbuf_space(push, 0, 0, 1);
588 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
589 NVC0_IB_ENTRY_1_NO_PREFETCH);
590 }
591
592 /* === DRIVER STATISTICS === */
593
594 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
595
596 static const char *nvc0_sw_query_drv_stat_names[] =
597 {
598 "drv-tex_obj_current_count",
599 "drv-tex_obj_current_bytes",
600 "drv-buf_obj_current_count",
601 "drv-buf_obj_current_bytes_vid",
602 "drv-buf_obj_current_bytes_sys",
603 "drv-tex_transfers_rd",
604 "drv-tex_transfers_wr",
605 "drv-tex_copy_count",
606 "drv-tex_blit_count",
607 "drv-tex_cache_flush_count",
608 "drv-buf_transfers_rd",
609 "drv-buf_transfers_wr",
610 "drv-buf_read_bytes_staging_vid",
611 "drv-buf_write_bytes_direct",
612 "drv-buf_write_bytes_staging_vid",
613 "drv-buf_write_bytes_staging_sys",
614 "drv-buf_copy_bytes",
615 "drv-buf_non_kernel_fence_sync_count",
616 "drv-any_non_kernel_fence_sync_count",
617 "drv-query_sync_count",
618 "drv-gpu_serialize_count",
619 "drv-draw_calls_array",
620 "drv-draw_calls_indexed",
621 "drv-draw_calls_fallback_count",
622 "drv-user_buffer_upload_bytes",
623 "drv-constbuf_upload_count",
624 "drv-constbuf_upload_bytes",
625 "drv-pushbuf_count",
626 "drv-resource_validate_count"
627 };
628
629 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
630
631
632 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
633
634 /* Code to read out MP counters: They are accessible via mmio, too, but let's
635 * just avoid mapping registers in userspace. We'd have to know which MPs are
636 * enabled/present, too, and that information is not presently exposed.
637 * We could add a kernel interface for it, but reading the counters like this
638 * has the advantage of being async (if get_result isn't called immediately).
639 */
640 static const uint64_t nve4_read_hw_sm_counters_code[] =
641 {
642 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
643 * mov b32 $r8 $tidx
644 * mov b32 $r12 $physid
645 * mov b32 $r0 $pm0
646 * mov b32 $r1 $pm1
647 * mov b32 $r2 $pm2
648 * mov b32 $r3 $pm3
649 * mov b32 $r4 $pm4
650 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
651 * mov b32 $r5 $pm5
652 * mov b32 $r6 $pm6
653 * mov b32 $r7 $pm7
654 * set $p0 0x1 eq u32 $r8 0x0
655 * mov b32 $r10 c0[0x0]
656 * ext u32 $r8 $r12 0x414
657 * mov b32 $r11 c0[0x4]
658 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
659 * ext u32 $r9 $r12 0x208
660 * (not $p0) exit
661 * set $p1 0x1 eq u32 $r9 0x0
662 * mul $r8 u32 $r8 u32 96
663 * mul $r12 u32 $r9 u32 16
664 * mul $r13 u32 $r9 u32 4
665 * add b32 $r9 $r8 $r13
666 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
667 * add b32 $r8 $r8 $r12
668 * mov b32 $r12 $r10
669 * add b32 $r10 $c $r10 $r8
670 * mov b32 $r13 $r11
671 * add b32 $r11 $r11 0x0 $c
672 * add b32 $r12 $c $r12 $r9
673 * st b128 wt g[$r10d] $r0q
674 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
675 * mov b32 $r0 c0[0x8]
676 * add b32 $r13 $r13 0x0 $c
677 * $p1 st b128 wt g[$r12d+0x40] $r4q
678 * st b32 wt g[$r12d+0x50] $r0
679 * exit */
680 0x2202020202020207ULL,
681 0x2c00000084021c04ULL,
682 0x2c0000000c031c04ULL,
683 0x2c00000010001c04ULL,
684 0x2c00000014005c04ULL,
685 0x2c00000018009c04ULL,
686 0x2c0000001c00dc04ULL,
687 0x2c00000020011c04ULL,
688 0x22b0420042320207ULL,
689 0x2c00000024015c04ULL,
690 0x2c00000028019c04ULL,
691 0x2c0000002c01dc04ULL,
692 0x190e0000fc81dc03ULL,
693 0x2800400000029de4ULL,
694 0x7000c01050c21c03ULL,
695 0x280040001002dde4ULL,
696 0x204282020042e047ULL,
697 0x7000c00820c25c03ULL,
698 0x80000000000021e7ULL,
699 0x190e0000fc93dc03ULL,
700 0x1000000180821c02ULL,
701 0x1000000040931c02ULL,
702 0x1000000010935c02ULL,
703 0x4800000034825c03ULL,
704 0x22c042c042c04287ULL,
705 0x4800000030821c03ULL,
706 0x2800000028031de4ULL,
707 0x4801000020a29c03ULL,
708 0x280000002c035de4ULL,
709 0x0800000000b2dc42ULL,
710 0x4801000024c31c03ULL,
711 0x9400000000a01fc5ULL,
712 0x200002e04202c047ULL,
713 0x2800400020001de4ULL,
714 0x0800000000d35c42ULL,
715 0x9400000100c107c5ULL,
716 0x9400000140c01f85ULL,
717 0x8000000000001de7ULL
718 };
719
720 /* NOTE: intentionally using the same names as NV */
721 static const char *nve4_pm_query_names[] =
722 {
723 /* MP counters */
724 "active_cycles",
725 "active_warps",
726 "atom_count",
727 "branch",
728 "divergent_branch",
729 "gld_request",
730 "global_ld_mem_divergence_replays",
731 "global_store_transaction",
732 "global_st_mem_divergence_replays",
733 "gred_count",
734 "gst_request",
735 "inst_executed",
736 "inst_issued",
737 "inst_issued1",
738 "inst_issued2",
739 "l1_global_load_hit",
740 "l1_global_load_miss",
741 "l1_local_load_hit",
742 "l1_local_load_miss",
743 "l1_local_store_hit",
744 "l1_local_store_miss",
745 "l1_shared_load_transactions",
746 "l1_shared_store_transactions",
747 "local_load",
748 "local_load_transactions",
749 "local_store",
750 "local_store_transactions",
751 "prof_trigger_00",
752 "prof_trigger_01",
753 "prof_trigger_02",
754 "prof_trigger_03",
755 "prof_trigger_04",
756 "prof_trigger_05",
757 "prof_trigger_06",
758 "prof_trigger_07",
759 "shared_load",
760 "shared_load_replay",
761 "shared_store",
762 "shared_store_replay",
763 "sm_cta_launched",
764 "threads_launched",
765 "uncached_global_load_transaction",
766 "warps_launched",
767 /* metrics, i.e. functions of the MP counters */
768 "metric-ipc", /* inst_executed, clock */
769 "metric-ipac", /* inst_executed, active_cycles */
770 "metric-ipec", /* inst_executed, (bool)inst_executed */
771 "metric-achieved_occupancy", /* active_warps, active_cycles */
772 "metric-sm_efficiency", /* active_cycles, clock */
773 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
774 };
775
776 /* For simplicity, we will allocate as many group slots as we allocate counter
777 * slots. This means that a single counter which wants to source from 2 groups
778 * will have to be declared as using 2 counter slots. This shouldn't really be
779 * a problem because such queries don't make much sense ... (unless someone is
780 * really creative).
781 */
782 struct nvc0_mp_counter_cfg
783 {
784 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
785 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
786 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
787 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
788 uint32_t sig_sel : 8; /* signal group */
789 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
790 };
791
792 #define NVC0_COUNTER_OPn_SUM 0
793 #define NVC0_COUNTER_OPn_OR 1
794 #define NVC0_COUNTER_OPn_AND 2
795 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
796 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
797 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
798 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
799
800 struct nvc0_hw_sm_query_cfg
801 {
802 struct nvc0_mp_counter_cfg ctr[4];
803 uint8_t num_counters;
804 uint8_t op;
805 uint8_t norm[2]; /* normalization num,denom */
806 };
807
808 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
809 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
810 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
811 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
812 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
813 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
814 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
815 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
816 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
817 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
818 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
819 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
820 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
821 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
822
823 /* NOTES:
824 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
825 * inst_executed etc.: we only count a single warp scheduler
826 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
827 * this is inaccurate !
828 */
829 static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
830 {
831 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
832 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
833 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
834 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
835 _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
836 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
837 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
838 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
839 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
840 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
841 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
842 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
843 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
844 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
845 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
846 _Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
847 _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
848 _Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
849 _Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
850 _Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
851 _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
852 _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
853 _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
854 _Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
855 _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
856 _Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
857 _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
858 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
859 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
860 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
861 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
862 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
863 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
864 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
865 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
866 _Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
867 _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
868 _Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
869 _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
870 _Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
871 _Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
872 _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
873 _Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
874 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
875 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
876 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
877 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
878 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
879 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
880 };
881
882 #undef _Q1A
883 #undef _Q1B
884 #undef _M2A
885 #undef _M2B
886
887 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
888 static const uint64_t nvc0_read_hw_sm_counters_code[] =
889 {
890 /* mov b32 $r8 $tidx
891 * mov b32 $r9 $physid
892 * mov b32 $r0 $pm0
893 * mov b32 $r1 $pm1
894 * mov b32 $r2 $pm2
895 * mov b32 $r3 $pm3
896 * mov b32 $r4 $pm4
897 * mov b32 $r5 $pm5
898 * mov b32 $r6 $pm6
899 * mov b32 $r7 $pm7
900 * set $p0 0x1 eq u32 $r8 0x0
901 * mov b32 $r10 c0[0x0]
902 * mov b32 $r11 c0[0x4]
903 * ext u32 $r8 $r9 0x414
904 * (not $p0) exit
905 * mul $r8 u32 $r8 u32 36
906 * add b32 $r10 $c $r10 $r8
907 * add b32 $r11 $r11 0x0 $c
908 * mov b32 $r8 c0[0x8]
909 * st b128 wt g[$r10d+0x00] $r0q
910 * st b128 wt g[$r10d+0x10] $r4q
911 * st b32 wt g[$r10d+0x20] $r8
912 * exit */
913 0x2c00000084021c04ULL,
914 0x2c0000000c025c04ULL,
915 0x2c00000010001c04ULL,
916 0x2c00000014005c04ULL,
917 0x2c00000018009c04ULL,
918 0x2c0000001c00dc04ULL,
919 0x2c00000020011c04ULL,
920 0x2c00000024015c04ULL,
921 0x2c00000028019c04ULL,
922 0x2c0000002c01dc04ULL,
923 0x190e0000fc81dc03ULL,
924 0x2800400000029de4ULL,
925 0x280040001002dde4ULL,
926 0x7000c01050921c03ULL,
927 0x80000000000021e7ULL,
928 0x1000000090821c02ULL,
929 0x4801000020a29c03ULL,
930 0x0800000000b2dc42ULL,
931 0x2800400020021de4ULL,
932 0x9400000000a01fc5ULL,
933 0x9400000040a11fc5ULL,
934 0x9400000080a21f85ULL,
935 0x8000000000001de7ULL
936 };
937
938 static const char *nvc0_pm_query_names[] =
939 {
940 /* MP counters */
941 "active_cycles",
942 "active_warps",
943 "atom_count",
944 "branch",
945 "divergent_branch",
946 "gld_request",
947 "gred_count",
948 "gst_request",
949 "inst_executed",
950 "inst_issued1_0",
951 "inst_issued1_1",
952 "inst_issued2_0",
953 "inst_issued2_1",
954 "local_load",
955 "local_store",
956 "prof_trigger_00",
957 "prof_trigger_01",
958 "prof_trigger_02",
959 "prof_trigger_03",
960 "prof_trigger_04",
961 "prof_trigger_05",
962 "prof_trigger_06",
963 "prof_trigger_07",
964 "shared_load",
965 "shared_store",
966 "threads_launched",
967 "thread_inst_executed_0",
968 "thread_inst_executed_1",
969 "thread_inst_executed_2",
970 "thread_inst_executed_3",
971 "warps_launched",
972 };
973
974 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
975
976 static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
977 {
978 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
979 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
980 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
981 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
982 _Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
983 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
984 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
985 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
986 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
987 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
988 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
989 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
990 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
991 _Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
992 _Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
993 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
994 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
995 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
996 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
997 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
998 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
999 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1000 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1001 _Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1002 _Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1003 _Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1004 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1005 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1006 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1007 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1008 _Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1009 };
1010
1011 #undef _Q
1012
1013 static const struct nvc0_hw_sm_query_cfg *
1014 nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1015 {
1016 struct nvc0_screen *screen = nvc0->screen;
1017
1018 if (screen->base.class_3d >= NVE4_3D_CLASS)
1019 return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1020 return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
1021 }
1022
1023 boolean
1024 nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1025 {
1026 struct nvc0_screen *screen = nvc0->screen;
1027 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1028 const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1029 const struct nvc0_hw_sm_query_cfg *cfg;
1030 unsigned i, c;
1031 unsigned num_ab[2] = { 0, 0 };
1032
1033 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1034
1035 /* check if we have enough free counter slots */
1036 for (i = 0; i < cfg->num_counters; ++i)
1037 num_ab[cfg->ctr[i].sig_dom]++;
1038
1039 if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
1040 screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
1041 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1042 return false;
1043 }
1044
1045 assert(cfg->num_counters <= 4);
1046 PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
1047
1048 if (!screen->pm.mp_counters_enabled) {
1049 screen->pm.mp_counters_enabled = true;
1050 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1051 PUSH_DATA (push, 0x1fcb);
1052 }
1053
1054 /* set sequence field to 0 (used to check if result is available) */
1055 for (i = 0; i < screen->mp_count; ++i)
1056 q->data[i * 10 + 10] = 0;
1057
1058 for (i = 0; i < cfg->num_counters; ++i) {
1059 const unsigned d = cfg->ctr[i].sig_dom;
1060
1061 if (!screen->pm.num_hw_sm_active[d]) {
1062 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1063 if (screen->pm.num_hw_sm_active[!d])
1064 m |= 1 << (7 + (8 * d));
1065 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1066 PUSH_DATA (push, m);
1067 }
1068 screen->pm.num_hw_sm_active[d]++;
1069
1070 for (c = d * 4; c < (d * 4 + 4); ++c) {
1071 if (!screen->pm.mp_counter[c]) {
1072 q->ctr[i] = c;
1073 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1074 break;
1075 }
1076 }
1077 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1078
1079 /* configure and reset the counter(s) */
1080 if (is_nve4) {
1081 if (d == 0)
1082 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1083 else
1084 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1085 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1086 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1087 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1088 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1089 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1090 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1091 PUSH_DATA (push, 0);
1092 } else {
1093 unsigned s;
1094
1095 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1096 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1097 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1098 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1099 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1100 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1101 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1102 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1103 PUSH_DATA (push, 0);
1104 }
1105 }
1106 }
1107 return true;
1108 }
1109
1110 static void
1111 nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1112 {
1113 struct nvc0_screen *screen = nvc0->screen;
1114 struct pipe_context *pipe = &nvc0->base.pipe;
1115 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1116 const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1117 uint32_t mask;
1118 uint32_t input[3];
1119 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1120 const uint grid[3] = { screen->mp_count, 1, 1 };
1121 unsigned c;
1122 const struct nvc0_hw_sm_query_cfg *cfg;
1123
1124 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1125
1126 if (unlikely(!screen->pm.prog)) {
1127 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1128 prog->type = PIPE_SHADER_COMPUTE;
1129 prog->translated = true;
1130 prog->num_gprs = 14;
1131 prog->parm_size = 12;
1132 if (is_nve4) {
1133 prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
1134 prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
1135 } else {
1136 prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
1137 prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
1138 }
1139 screen->pm.prog = prog;
1140 }
1141
1142 /* disable all counting */
1143 PUSH_SPACE(push, 8);
1144 for (c = 0; c < 8; ++c)
1145 if (screen->pm.mp_counter[c]) {
1146 if (is_nve4) {
1147 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1148 } else {
1149 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1150 }
1151 }
1152 /* release counters for this query */
1153 for (c = 0; c < 8; ++c) {
1154 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1155 screen->pm.num_hw_sm_active[c / 4]--;
1156 screen->pm.mp_counter[c] = NULL;
1157 }
1158 }
1159
1160 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1161 q->bo);
1162
1163 PUSH_SPACE(push, 1);
1164 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1165
1166 pipe->bind_compute_state(pipe, screen->pm.prog);
1167 input[0] = (q->bo->offset + q->base);
1168 input[1] = (q->bo->offset + q->base) >> 32;
1169 input[2] = q->sequence;
1170 pipe->launch_grid(pipe, block, grid, 0, input);
1171
1172 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1173
1174 /* re-activate other counters */
1175 PUSH_SPACE(push, 16);
1176 mask = 0;
1177 for (c = 0; c < 8; ++c) {
1178 unsigned i;
1179 q = nvc0_query(screen->pm.mp_counter[c]);
1180 if (!q)
1181 continue;
1182 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1183 for (i = 0; i < cfg->num_counters; ++i) {
1184 if (mask & (1 << q->ctr[i]))
1185 break;
1186 mask |= 1 << q->ctr[i];
1187 if (is_nve4) {
1188 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1189 } else {
1190 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1191 }
1192 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1193 }
1194 }
1195 }
1196
1197 static inline bool
1198 nvc0_hw_sm_query_read_data(uint32_t count[32][4],
1199 struct nvc0_context *nvc0, bool wait,
1200 struct nvc0_query *q,
1201 const struct nvc0_hw_sm_query_cfg *cfg,
1202 unsigned mp_count)
1203 {
1204 unsigned p, c;
1205
1206 for (p = 0; p < mp_count; ++p) {
1207 const unsigned b = (0x24 / 4) * p;
1208
1209 for (c = 0; c < cfg->num_counters; ++c) {
1210 if (q->data[b + 8] != q->sequence) {
1211 if (!wait)
1212 return false;
1213 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1214 return false;
1215 }
1216 count[p][c] = q->data[b + q->ctr[c]];
1217 }
1218 }
1219 return true;
1220 }
1221
1222 static inline bool
1223 nve4_hw_sm_query_read_data(uint32_t count[32][4],
1224 struct nvc0_context *nvc0, bool wait,
1225 struct nvc0_query *q,
1226 const struct nvc0_hw_sm_query_cfg *cfg,
1227 unsigned mp_count)
1228 {
1229 unsigned p, c, d;
1230
1231 for (p = 0; p < mp_count; ++p) {
1232 const unsigned b = (0x60 / 4) * p;
1233
1234 for (c = 0; c < cfg->num_counters; ++c) {
1235 count[p][c] = 0;
1236 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1237 if (q->data[b + 20 + d] != q->sequence) {
1238 if (!wait)
1239 return false;
1240 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1241 return false;
1242 }
1243 if (q->ctr[c] & ~0x3)
1244 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1245 else
1246 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1247 }
1248 }
1249 }
1250 return true;
1251 }
1252
1253 /* Metric calculations:
1254 * sum(x) ... sum of x over all MPs
1255 * avg(x) ... average of x over all MPs
1256 *
1257 * IPC : sum(inst_executed) / clock
1258 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1259 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1260 * MP_EFFICIENCY : avg(active_cycles / clock)
1261 *
1262 * NOTE: Interpretation of IPC requires knowledge of MP count.
1263 */
1264 static boolean
1265 nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1266 void *result, boolean wait)
1267 {
1268 uint32_t count[32][4];
1269 uint64_t value = 0;
1270 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1271 unsigned p, c;
1272 const struct nvc0_hw_sm_query_cfg *cfg;
1273 bool ret;
1274
1275 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1276
1277 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1278 ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1279 else
1280 ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1281 if (!ret)
1282 return false;
1283
1284 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1285 for (c = 0; c < cfg->num_counters; ++c)
1286 for (p = 0; p < mp_count; ++p)
1287 value += count[p][c];
1288 value = (value * cfg->norm[0]) / cfg->norm[1];
1289 } else
1290 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1291 uint32_t v = 0;
1292 for (c = 0; c < cfg->num_counters; ++c)
1293 for (p = 0; p < mp_count; ++p)
1294 v |= count[p][c];
1295 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1296 } else
1297 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1298 uint32_t v = ~0;
1299 for (c = 0; c < cfg->num_counters; ++c)
1300 for (p = 0; p < mp_count; ++p)
1301 v &= count[p][c];
1302 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1303 } else
1304 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1305 uint64_t v[2] = { 0, 0 };
1306 for (p = 0; p < mp_count; ++p) {
1307 v[0] += count[p][0];
1308 v[1] += count[p][1];
1309 }
1310 if (v[0])
1311 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1312 } else
1313 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1314 for (p = 0; p < mp_count; ++p)
1315 value += count[p][0];
1316 if (count[0][1])
1317 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1318 else
1319 value = 0;
1320 } else
1321 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1322 unsigned mp_used = 0;
1323 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1324 if (count[p][1])
1325 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1326 if (mp_used)
1327 value /= (uint64_t)mp_used * cfg->norm[1];
1328 } else
1329 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1330 unsigned mp_used = 0;
1331 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1332 value += count[p][0];
1333 if (count[0][1] && mp_used) {
1334 value *= cfg->norm[0];
1335 value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
1336 } else {
1337 value = 0;
1338 }
1339 }
1340
1341 *(uint64_t *)result = value;
1342 return true;
1343 }
1344
1345 int
1346 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1347 unsigned id,
1348 struct pipe_driver_query_info *info)
1349 {
1350 struct nvc0_screen *screen = nvc0_screen(pscreen);
1351 int count = 0;
1352
1353 count += NVC0_SW_QUERY_DRV_STAT_COUNT;
1354
1355 if (screen->base.device->drm_version >= 0x01000101) {
1356 if (screen->compute) {
1357 if (screen->base.class_3d == NVE4_3D_CLASS) {
1358 count += NVE4_HW_SM_QUERY_COUNT;
1359 } else
1360 if (screen->base.class_3d < NVE4_3D_CLASS) {
1361 /* NVC0_COMPUTE is not always enabled */
1362 count += NVC0_HW_SM_QUERY_COUNT;
1363 }
1364 }
1365 }
1366
1367 if (!info)
1368 return count;
1369
1370 /* Init default values. */
1371 info->name = "this_is_not_the_query_you_are_looking_for";
1372 info->query_type = 0xdeadd01d;
1373 info->max_value.u64 = 0;
1374 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
1375 info->group_id = -1;
1376
1377 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1378 if (id < NVC0_SW_QUERY_DRV_STAT_COUNT) {
1379 info->name = nvc0_sw_query_drv_stat_names[id];
1380 info->query_type = NVC0_SW_QUERY_DRV_STAT(id);
1381 info->max_value.u64 = 0;
1382 if (strstr(info->name, "bytes"))
1383 info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
1384 info->group_id = NVC0_SW_QUERY_DRV_STAT_GROUP;
1385 return 1;
1386 } else
1387 #endif
1388 if (id < count) {
1389 if (screen->compute) {
1390 if (screen->base.class_3d == NVE4_3D_CLASS) {
1391 info->name = nve4_pm_query_names[id - NVC0_SW_QUERY_DRV_STAT_COUNT];
1392 info->query_type = NVE4_HW_SM_QUERY(id - NVC0_SW_QUERY_DRV_STAT_COUNT);
1393 info->max_value.u64 =
1394 (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
1395 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1396 return 1;
1397 } else
1398 if (screen->base.class_3d < NVE4_3D_CLASS) {
1399 info->name = nvc0_pm_query_names[id - NVC0_SW_QUERY_DRV_STAT_COUNT];
1400 info->query_type = NVC0_HW_SM_QUERY(id - NVC0_SW_QUERY_DRV_STAT_COUNT);
1401 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1402 return 1;
1403 }
1404 }
1405 }
1406 /* user asked for info about non-existing query */
1407 return 0;
1408 }
1409
1410 int
1411 nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
1412 unsigned id,
1413 struct pipe_driver_query_group_info *info)
1414 {
1415 struct nvc0_screen *screen = nvc0_screen(pscreen);
1416 int count = 0;
1417
1418 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1419 count++;
1420 #endif
1421
1422 if (screen->base.device->drm_version >= 0x01000101) {
1423 if (screen->compute) {
1424 if (screen->base.class_3d == NVE4_3D_CLASS) {
1425 count++;
1426 } else
1427 if (screen->base.class_3d < NVE4_3D_CLASS) {
1428 count++; /* NVC0_COMPUTE is not always enabled */
1429 }
1430 }
1431 }
1432
1433 if (!info)
1434 return count;
1435
1436 if (id == NVC0_QUERY_MP_COUNTER_GROUP) {
1437 if (screen->compute) {
1438 info->name = "MP counters";
1439 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
1440
1441 if (screen->base.class_3d == NVE4_3D_CLASS) {
1442 info->num_queries = NVE4_HW_SM_QUERY_COUNT;
1443
1444 /* On NVE4+, each multiprocessor have 8 hardware counters separated
1445 * in two distinct domains, but we allow only one active query
1446 * simultaneously because some of them use more than one hardware
1447 * counter and this will result in an undefined behaviour. */
1448 info->max_active_queries = 1; /* TODO: handle multiple hw counters */
1449 return 1;
1450 } else
1451 if (screen->base.class_3d < NVE4_3D_CLASS) {
1452 info->num_queries = NVC0_HW_SM_QUERY_COUNT;
1453
1454 /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
1455 * in a single domain. */
1456 info->max_active_queries = 8;
1457 return 1;
1458 }
1459 }
1460 }
1461 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1462 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
1463 info->name = "Driver statistics";
1464 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
1465 info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
1466 info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
1467 return 1;
1468 }
1469 #endif
1470
1471 /* user asked for info about non-existing query group */
1472 info->name = "this_is_not_the_query_group_you_are_looking_for";
1473 info->max_active_queries = 0;
1474 info->num_queries = 0;
1475 info->type = 0;
1476 return 0;
1477 }
1478
1479 void
1480 nvc0_init_query_functions(struct nvc0_context *nvc0)
1481 {
1482 struct pipe_context *pipe = &nvc0->base.pipe;
1483
1484 pipe->create_query = nvc0_query_create;
1485 pipe->destroy_query = nvc0_query_destroy;
1486 pipe->begin_query = nvc0_query_begin;
1487 pipe->end_query = nvc0_query_end;
1488 pipe->get_query_result = nvc0_query_result;
1489 pipe->render_condition = nvc0_render_condition;
1490 }