2 * Copyright 2011 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Christoph Bumiller
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
27 #include "nvc0_context.h"
28 #include "nouveau/nv_object.xml.h"
29 #include "nve4_compute.xml.h"
31 #define NVC0_QUERY_STATE_READY 0
32 #define NVC0_QUERY_STATE_ACTIVE 1
33 #define NVC0_QUERY_STATE_ENDED 2
34 #define NVC0_QUERY_STATE_FLUSHED 3
42 struct nouveau_bo
*bo
;
44 uint32_t offset
; /* base + i * rotate */
48 int nesting
; /* only used for occlusion queries */
50 struct nouveau_mm_allocation
*mm
;
53 struct nouveau_fence
*fence
;
56 #define NVC0_QUERY_ALLOC_SPACE 256
58 static void nve4_mp_pm_query_begin(struct nvc0_context
*, struct nvc0_query
*);
59 static void nve4_mp_pm_query_end(struct nvc0_context
*, struct nvc0_query
*);
60 static boolean
nve4_mp_pm_query_result(struct nvc0_context
*,
61 struct nvc0_query
*, void *, boolean
);
63 static INLINE
struct nvc0_query
*
64 nvc0_query(struct pipe_query
*pipe
)
66 return (struct nvc0_query
*)pipe
;
70 nvc0_query_allocate(struct nvc0_context
*nvc0
, struct nvc0_query
*q
, int size
)
72 struct nvc0_screen
*screen
= nvc0
->screen
;
76 nouveau_bo_ref(NULL
, &q
->bo
);
78 if (q
->state
== NVC0_QUERY_STATE_READY
)
79 nouveau_mm_free(q
->u
.mm
);
81 nouveau_fence_work(screen
->base
.fence
.current
,
82 nouveau_mm_free_work
, q
->u
.mm
);
86 q
->u
.mm
= nouveau_mm_allocate(screen
->base
.mm_GART
, size
, &q
->bo
, &q
->base
);
91 ret
= nouveau_bo_map(q
->bo
, 0, screen
->base
.client
);
93 nvc0_query_allocate(nvc0
, q
, 0);
96 q
->data
= (uint32_t *)((uint8_t *)q
->bo
->map
+ q
->base
);
102 nvc0_query_destroy(struct pipe_context
*pipe
, struct pipe_query
*pq
)
104 nvc0_query_allocate(nvc0_context(pipe
), nvc0_query(pq
), 0);
105 nouveau_fence_ref(NULL
, &nvc0_query(pq
)->fence
);
106 FREE(nvc0_query(pq
));
109 static struct pipe_query
*
110 nvc0_query_create(struct pipe_context
*pipe
, unsigned type
)
112 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
113 struct nvc0_query
*q
;
114 unsigned space
= NVC0_QUERY_ALLOC_SPACE
;
116 q
= CALLOC_STRUCT(nvc0_query
);
121 case PIPE_QUERY_OCCLUSION_COUNTER
:
122 case PIPE_QUERY_OCCLUSION_PREDICATE
:
124 space
= NVC0_QUERY_ALLOC_SPACE
;
126 case PIPE_QUERY_PIPELINE_STATISTICS
:
130 case PIPE_QUERY_SO_STATISTICS
:
131 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
135 case PIPE_QUERY_PRIMITIVES_GENERATED
:
136 case PIPE_QUERY_PRIMITIVES_EMITTED
:
140 case PIPE_QUERY_TIME_ELAPSED
:
141 case PIPE_QUERY_TIMESTAMP
:
142 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
143 case PIPE_QUERY_GPU_FINISHED
:
146 case NVC0_QUERY_TFB_BUFFER_OFFSET
:
150 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
151 if (type
>= NVC0_QUERY_DRV_STAT(0) && type
<= NVC0_QUERY_DRV_STAT_LAST
) {
154 q
->index
= type
- NVC0_QUERY_DRV_STAT(0);
158 if (nvc0
->screen
->base
.class_3d
>= NVE4_3D_CLASS
&&
159 nvc0
->screen
->base
.device
->drm_version
>= 0x01000101) {
160 if (type
>= NVE4_PM_QUERY(0) &&
161 type
<= NVE4_PM_QUERY_LAST
) {
183 * [50] = WS0.sequence
184 * [54] = WS1.sequence
185 * [58] = WS2.sequence
186 * [5c] = WS3.sequence
188 space
= (4 * 4 + 4 + 4) * nvc0
->screen
->mp_count
* sizeof(uint32_t);
192 debug_printf("invalid query type: %u\n", type
);
196 if (!nvc0_query_allocate(nvc0
, q
, space
)) {
204 /* we advance before query_begin ! */
205 q
->offset
-= q
->rotate
;
206 q
->data
-= q
->rotate
/ sizeof(*q
->data
);
209 q
->data
[0] = 0; /* initialize sequence */
211 return (struct pipe_query
*)q
;
215 nvc0_query_get(struct nouveau_pushbuf
*push
, struct nvc0_query
*q
,
216 unsigned offset
, uint32_t get
)
221 PUSH_REFN (push
, q
->bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_WR
);
222 BEGIN_NVC0(push
, NVC0_3D(QUERY_ADDRESS_HIGH
), 4);
223 PUSH_DATAh(push
, q
->bo
->offset
+ offset
);
224 PUSH_DATA (push
, q
->bo
->offset
+ offset
);
225 PUSH_DATA (push
, q
->sequence
);
226 PUSH_DATA (push
, get
);
230 nvc0_query_rotate(struct nvc0_context
*nvc0
, struct nvc0_query
*q
)
232 q
->offset
+= q
->rotate
;
233 q
->data
+= q
->rotate
/ sizeof(*q
->data
);
234 if (q
->offset
- q
->base
== NVC0_QUERY_ALLOC_SPACE
)
235 nvc0_query_allocate(nvc0
, q
, NVC0_QUERY_ALLOC_SPACE
);
239 nvc0_query_begin(struct pipe_context
*pipe
, struct pipe_query
*pq
)
241 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
242 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
243 struct nvc0_query
*q
= nvc0_query(pq
);
245 /* For occlusion queries we have to change the storage, because a previous
246 * query might set the initial render conition to FALSE even *after* we re-
247 * initialized it to TRUE.
250 nvc0_query_rotate(nvc0
, q
);
252 /* XXX: can we do this with the GPU, and sync with respect to a previous
255 q
->data
[0] = q
->sequence
; /* initialize sequence */
256 q
->data
[1] = 1; /* initial render condition = TRUE */
257 q
->data
[4] = q
->sequence
+ 1; /* for comparison COND_MODE */
263 case PIPE_QUERY_OCCLUSION_COUNTER
:
264 case PIPE_QUERY_OCCLUSION_PREDICATE
:
265 q
->nesting
= nvc0
->screen
->num_occlusion_queries_active
++;
267 nvc0_query_get(push
, q
, 0x10, 0x0100f002);
270 BEGIN_NVC0(push
, NVC0_3D(COUNTER_RESET
), 1);
271 PUSH_DATA (push
, NVC0_3D_COUNTER_RESET_SAMPLECNT
);
272 IMMED_NVC0(push
, NVC0_3D(SAMPLECNT_ENABLE
), 1);
275 case PIPE_QUERY_PRIMITIVES_GENERATED
:
276 nvc0_query_get(push
, q
, 0x10, 0x09005002 | (q
->index
<< 5));
278 case PIPE_QUERY_PRIMITIVES_EMITTED
:
279 nvc0_query_get(push
, q
, 0x10, 0x05805002 | (q
->index
<< 5));
281 case PIPE_QUERY_SO_STATISTICS
:
282 nvc0_query_get(push
, q
, 0x20, 0x05805002 | (q
->index
<< 5));
283 nvc0_query_get(push
, q
, 0x30, 0x06805002 | (q
->index
<< 5));
285 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
286 nvc0_query_get(push
, q
, 0x10, 0x03005002 | (q
->index
<< 5));
288 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
289 case PIPE_QUERY_TIME_ELAPSED
:
290 nvc0_query_get(push
, q
, 0x10, 0x00005002);
292 case PIPE_QUERY_PIPELINE_STATISTICS
:
293 nvc0_query_get(push
, q
, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
294 nvc0_query_get(push
, q
, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
295 nvc0_query_get(push
, q
, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
296 nvc0_query_get(push
, q
, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
297 nvc0_query_get(push
, q
, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
298 nvc0_query_get(push
, q
, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
299 nvc0_query_get(push
, q
, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
300 nvc0_query_get(push
, q
, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
301 nvc0_query_get(push
, q
, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
302 nvc0_query_get(push
, q
, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
305 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
306 if (q
->type
>= NVC0_QUERY_DRV_STAT(0) &&
307 q
->type
<= NVC0_QUERY_DRV_STAT_LAST
) {
309 q
->u
.value
= nvc0
->screen
->base
.stats
.v
[q
->index
];
314 if (q
->type
>= NVE4_PM_QUERY(0) && q
->type
<= NVE4_PM_QUERY_LAST
) {
315 nve4_mp_pm_query_begin(nvc0
, q
);
319 q
->state
= NVC0_QUERY_STATE_ACTIVE
;
323 nvc0_query_end(struct pipe_context
*pipe
, struct pipe_query
*pq
)
325 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
326 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
327 struct nvc0_query
*q
= nvc0_query(pq
);
329 if (q
->state
!= NVC0_QUERY_STATE_ACTIVE
) {
330 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
332 nvc0_query_rotate(nvc0
, q
);
335 q
->state
= NVC0_QUERY_STATE_ENDED
;
338 case PIPE_QUERY_OCCLUSION_COUNTER
:
339 case PIPE_QUERY_OCCLUSION_PREDICATE
:
340 nvc0_query_get(push
, q
, 0, 0x0100f002);
341 if (--nvc0
->screen
->num_occlusion_queries_active
== 0) {
343 IMMED_NVC0(push
, NVC0_3D(SAMPLECNT_ENABLE
), 0);
346 case PIPE_QUERY_PRIMITIVES_GENERATED
:
347 nvc0_query_get(push
, q
, 0, 0x09005002 | (q
->index
<< 5));
349 case PIPE_QUERY_PRIMITIVES_EMITTED
:
350 nvc0_query_get(push
, q
, 0, 0x05805002 | (q
->index
<< 5));
352 case PIPE_QUERY_SO_STATISTICS
:
353 nvc0_query_get(push
, q
, 0x00, 0x05805002 | (q
->index
<< 5));
354 nvc0_query_get(push
, q
, 0x10, 0x06805002 | (q
->index
<< 5));
356 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
357 /* TODO: How do we sum over all streams for render condition ? */
358 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
359 nvc0_query_get(push
, q
, 0x00, 0x03005002 | (q
->index
<< 5));
360 nvc0_query_get(push
, q
, 0x20, 0x00005002);
362 case PIPE_QUERY_TIMESTAMP
:
363 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
364 case PIPE_QUERY_TIME_ELAPSED
:
365 nvc0_query_get(push
, q
, 0, 0x00005002);
367 case PIPE_QUERY_GPU_FINISHED
:
368 nvc0_query_get(push
, q
, 0, 0x1000f010);
370 case PIPE_QUERY_PIPELINE_STATISTICS
:
371 nvc0_query_get(push
, q
, 0x00, 0x00801002); /* VFETCH, VERTICES */
372 nvc0_query_get(push
, q
, 0x10, 0x01801002); /* VFETCH, PRIMS */
373 nvc0_query_get(push
, q
, 0x20, 0x02802002); /* VP, LAUNCHES */
374 nvc0_query_get(push
, q
, 0x30, 0x03806002); /* GP, LAUNCHES */
375 nvc0_query_get(push
, q
, 0x40, 0x04806002); /* GP, PRIMS_OUT */
376 nvc0_query_get(push
, q
, 0x50, 0x07804002); /* RAST, PRIMS_IN */
377 nvc0_query_get(push
, q
, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
378 nvc0_query_get(push
, q
, 0x70, 0x0980a002); /* ROP, PIXELS */
379 nvc0_query_get(push
, q
, 0x80, 0x0d808002); /* TCP, LAUNCHES */
380 nvc0_query_get(push
, q
, 0x90, 0x0e809002); /* TEP, LAUNCHES */
382 case NVC0_QUERY_TFB_BUFFER_OFFSET
:
383 /* indexed by TFB buffer instead of by vertex stream */
384 nvc0_query_get(push
, q
, 0x00, 0x0d005002 | (q
->index
<< 5));
387 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
388 if (q
->type
>= NVC0_QUERY_DRV_STAT(0) &&
389 q
->type
<= NVC0_QUERY_DRV_STAT_LAST
) {
390 q
->u
.value
= nvc0
->screen
->base
.stats
.v
[q
->index
] - q
->u
.value
;
394 if (q
->type
>= NVE4_PM_QUERY(0) && q
->type
<= NVE4_PM_QUERY_LAST
)
395 nve4_mp_pm_query_end(nvc0
, q
);
399 nouveau_fence_ref(nvc0
->screen
->base
.fence
.current
, &q
->fence
);
403 nvc0_query_update(struct nouveau_client
*cli
, struct nvc0_query
*q
)
406 if (nouveau_fence_signalled(q
->fence
))
407 q
->state
= NVC0_QUERY_STATE_READY
;
409 if (q
->data
[0] == q
->sequence
)
410 q
->state
= NVC0_QUERY_STATE_READY
;
415 nvc0_query_result(struct pipe_context
*pipe
, struct pipe_query
*pq
,
416 boolean wait
, union pipe_query_result
*result
)
418 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
419 struct nvc0_query
*q
= nvc0_query(pq
);
420 uint64_t *res64
= (uint64_t*)result
;
421 uint32_t *res32
= (uint32_t*)result
;
422 boolean
*res8
= (boolean
*)result
;
423 uint64_t *data64
= (uint64_t *)q
->data
;
426 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
427 if (q
->type
>= NVC0_QUERY_DRV_STAT(0) &&
428 q
->type
<= NVC0_QUERY_DRV_STAT_LAST
) {
429 res64
[0] = q
->u
.value
;
433 if (q
->type
>= NVE4_PM_QUERY(0) && q
->type
<= NVE4_PM_QUERY_LAST
) {
434 return nve4_mp_pm_query_result(nvc0
, q
, result
, wait
);
437 if (q
->state
!= NVC0_QUERY_STATE_READY
)
438 nvc0_query_update(nvc0
->screen
->base
.client
, q
);
440 if (q
->state
!= NVC0_QUERY_STATE_READY
) {
442 if (q
->state
!= NVC0_QUERY_STATE_FLUSHED
) {
443 q
->state
= NVC0_QUERY_STATE_FLUSHED
;
444 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
445 PUSH_KICK(nvc0
->base
.pushbuf
);
449 if (nouveau_bo_wait(q
->bo
, NOUVEAU_BO_RD
, nvc0
->screen
->base
.client
))
451 NOUVEAU_DRV_STAT(&nvc0
->screen
->base
, query_sync_count
, 1);
453 q
->state
= NVC0_QUERY_STATE_READY
;
456 case PIPE_QUERY_GPU_FINISHED
:
459 case PIPE_QUERY_OCCLUSION_COUNTER
: /* u32 sequence, u32 count, u64 time */
460 res64
[0] = q
->data
[1] - q
->data
[5];
462 case PIPE_QUERY_OCCLUSION_PREDICATE
:
463 res8
[0] = q
->data
[1] != q
->data
[5];
465 case PIPE_QUERY_PRIMITIVES_GENERATED
: /* u64 count, u64 time */
466 case PIPE_QUERY_PRIMITIVES_EMITTED
: /* u64 count, u64 time */
467 res64
[0] = data64
[0] - data64
[2];
469 case PIPE_QUERY_SO_STATISTICS
:
470 res64
[0] = data64
[0] - data64
[4];
471 res64
[1] = data64
[2] - data64
[6];
473 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
474 res8
[0] = data64
[0] != data64
[2];
476 case PIPE_QUERY_TIMESTAMP
:
477 res64
[0] = data64
[1];
479 case PIPE_QUERY_TIMESTAMP_DISJOINT
: /* u32 sequence, u32 0, u64 time */
480 res64
[0] = 1000000000;
481 res8
[8] = (data64
[1] == data64
[3]) ? FALSE
: TRUE
;
483 case PIPE_QUERY_TIME_ELAPSED
:
484 res64
[0] = data64
[1] - data64
[3];
486 case PIPE_QUERY_PIPELINE_STATISTICS
:
487 for (i
= 0; i
< 10; ++i
)
488 res64
[i
] = data64
[i
* 2] - data64
[24 + i
* 2];
490 case NVC0_QUERY_TFB_BUFFER_OFFSET
:
491 res32
[0] = q
->data
[1];
494 assert(0); /* can't happen, we don't create queries with invalid type */
502 nvc0_query_fifo_wait(struct nouveau_pushbuf
*push
, struct pipe_query
*pq
)
504 struct nvc0_query
*q
= nvc0_query(pq
);
505 unsigned offset
= q
->offset
;
507 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
) offset
+= 0x20;
510 PUSH_REFN (push
, q
->bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_RD
);
511 BEGIN_NVC0(push
, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH
), 4);
512 PUSH_DATAh(push
, q
->bo
->offset
+ offset
);
513 PUSH_DATA (push
, q
->bo
->offset
+ offset
);
514 PUSH_DATA (push
, q
->sequence
);
515 PUSH_DATA (push
, (1 << 12) |
516 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL
);
520 nvc0_render_condition(struct pipe_context
*pipe
,
521 struct pipe_query
*pq
, uint mode
)
523 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
524 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
525 struct nvc0_query
*q
;
527 boolean negated
= FALSE
;
529 mode
!= PIPE_RENDER_COND_NO_WAIT
&&
530 mode
!= PIPE_RENDER_COND_BY_REGION_NO_WAIT
;
532 nvc0
->cond_query
= pq
;
533 nvc0
->cond_mode
= mode
;
537 IMMED_NVC0(push
, NVC0_3D(COND_MODE
), NVC0_3D_COND_MODE_ALWAYS
);
542 /* NOTE: comparison of 2 queries only works if both have completed */
544 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
545 cond
= negated
? NVC0_3D_COND_MODE_EQUAL
:
546 NVC0_3D_COND_MODE_NOT_EQUAL
;
549 case PIPE_QUERY_OCCLUSION_COUNTER
:
550 case PIPE_QUERY_OCCLUSION_PREDICATE
:
551 if (likely(!negated
)) {
552 if (unlikely(q
->nesting
))
553 cond
= wait
? NVC0_3D_COND_MODE_NOT_EQUAL
:
554 NVC0_3D_COND_MODE_ALWAYS
;
556 cond
= NVC0_3D_COND_MODE_RES_NON_ZERO
;
558 cond
= wait
? NVC0_3D_COND_MODE_EQUAL
: NVC0_3D_COND_MODE_ALWAYS
;
562 assert(!"render condition query not a predicate");
563 mode
= NVC0_3D_COND_MODE_ALWAYS
;
568 nvc0_query_fifo_wait(push
, pq
);
571 PUSH_REFN (push
, q
->bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_RD
);
572 BEGIN_NVC0(push
, NVC0_3D(COND_ADDRESS_HIGH
), 3);
573 PUSH_DATAh(push
, q
->bo
->offset
+ q
->offset
);
574 PUSH_DATA (push
, q
->bo
->offset
+ q
->offset
);
575 PUSH_DATA (push
, cond
);
579 nvc0_query_pushbuf_submit(struct nouveau_pushbuf
*push
,
580 struct pipe_query
*pq
, unsigned result_offset
)
582 struct nvc0_query
*q
= nvc0_query(pq
);
584 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
586 nouveau_pushbuf_space(push
, 0, 0, 1);
587 nouveau_pushbuf_data(push
, q
->bo
, q
->offset
+ result_offset
, 4 |
588 NVC0_IB_ENTRY_1_NO_PREFETCH
);
592 nvc0_so_target_save_offset(struct pipe_context
*pipe
,
593 struct pipe_stream_output_target
*ptarg
,
594 unsigned index
, boolean
*serialize
)
596 struct nvc0_so_target
*targ
= nvc0_so_target(ptarg
);
600 PUSH_SPACE(nvc0_context(pipe
)->base
.pushbuf
, 1);
601 IMMED_NVC0(nvc0_context(pipe
)->base
.pushbuf
, NVC0_3D(SERIALIZE
), 0);
603 NOUVEAU_DRV_STAT(nouveau_screen(pipe
->screen
), gpu_serialize_count
, 1);
606 nvc0_query(targ
->pq
)->index
= index
;
608 nvc0_query_end(pipe
, targ
->pq
);
612 /* === DRIVER STATISTICS === */
614 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
616 static const char *nvc0_drv_stat_names
[] =
618 "drv-tex_obj_current_count",
619 "drv-tex_obj_current_bytes",
620 "drv-buf_obj_current_count",
621 "drv-buf_obj_current_bytes_vid",
622 "drv-buf_obj_current_bytes_sys",
623 "drv-tex_transfers_rd",
624 "drv-tex_transfers_wr",
625 "drv-tex_copy_count",
626 "drv-tex_blit_count",
627 "drv-tex_cache_flush_count",
628 "drv-buf_transfers_rd",
629 "drv-buf_transfers_wr",
630 "drv-buf_read_bytes_staging_vid",
631 "drv-buf_write_bytes_direct",
632 "drv-buf_write_bytes_staging_vid",
633 "drv-buf_write_bytes_staging_sys",
634 "drv-buf_copy_bytes",
635 "drv-buf_non_kernel_fence_sync_count",
636 "drv-any_non_kernel_fence_sync_count",
637 "drv-query_sync_count",
638 "drv-gpu_serialize_count",
639 "drv-draw_calls_array",
640 "drv-draw_calls_indexed",
641 "drv-draw_calls_fallback_count",
642 "drv-user_buffer_upload_bytes",
643 "drv-constbuf_upload_count",
644 "drv-constbuf_upload_bytes",
646 "drv-resource_validate_count"
649 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
652 /* === PERFORMANCE MONITORING COUNTERS === */
654 /* Code to read out MP counters: They are accessible via mmio, too, but let's
655 * just avoid mapping registers in userspace. We'd have to know which MPs are
656 * enabled/present, too, and that information is not presently exposed.
657 * We could add a kernel interface for it, but reading the counters like this
658 * has the advantage of being async (if get_result isn't called immediately).
660 static const uint64_t nve4_read_mp_pm_counters_code
[] =
662 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
664 * mov b32 $r12 $physid
670 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
674 * set $p0 0x1 eq u32 $r8 0x0
675 * mov b32 $r10 c0[0x0]
676 * ext u32 $r8 $r12 0x414
677 * mov b32 $r11 c0[0x4]
678 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
679 * ext u32 $r9 $r12 0x208
681 * set $p1 0x1 eq u32 $r9 0x0
682 * mul $r8 u32 $r8 u32 96
683 * mul $r12 u32 $r9 u32 16
684 * mul $r13 u32 $r9 u32 4
685 * add b32 $r9 $r8 $r13
686 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
687 * add b32 $r8 $r8 $r12
689 * add b32 $r10 $c $r10 $r8
691 * add b32 $r11 $r11 0x0 $c
692 * add b32 $r12 $c $r12 $r9
693 * st b128 wt g[$r10d] $r0q
694 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
695 * mov b32 $r0 c0[0x8]
696 * add b32 $r13 $r13 0x0 $c
697 * $p1 st b128 wt g[$r12d+0x40] $r4q
698 * st b32 wt g[$r12d+0x50] $r0
700 0x2202020202020207ULL
,
701 0x2c00000084021c04ULL
,
702 0x2c0000000c031c04ULL
,
703 0x2c00000010001c04ULL
,
704 0x2c00000014005c04ULL
,
705 0x2c00000018009c04ULL
,
706 0x2c0000001c00dc04ULL
,
707 0x2c00000020011c04ULL
,
708 0x22b0420042320207ULL
,
709 0x2c00000024015c04ULL
,
710 0x2c00000028019c04ULL
,
711 0x2c0000002c01dc04ULL
,
712 0x190e0000fc81dc03ULL
,
713 0x2800400000029de4ULL
,
714 0x7000c01050c21c03ULL
,
715 0x280040001002dde4ULL
,
716 0x204282020042e047ULL
,
717 0x7000c00820c25c03ULL
,
718 0x80000000000021e7ULL
,
719 0x190e0000fc93dc03ULL
,
720 0x1000000180821c02ULL
,
721 0x1000000040931c02ULL
,
722 0x1000000010935c02ULL
,
723 0x4800000034825c03ULL
,
724 0x22c042c042c04287ULL
,
725 0x4800000030821c03ULL
,
726 0x2800000028031de4ULL
,
727 0x4801000020a29c03ULL
,
728 0x280000002c035de4ULL
,
729 0x0800000000b2dc42ULL
,
730 0x4801000024c31c03ULL
,
731 0x9400000000a01fc5ULL
,
732 0x200002e04202c047ULL
,
733 0x2800400020001de4ULL
,
734 0x0800000000d35c42ULL
,
735 0x9400000100c107c5ULL
,
736 0x9400000140c01f85ULL
,
737 0x8000000000001de7ULL
740 /* NOTE: intentionally using the same names as NV */
741 static const char *nve4_pm_query_names
[] =
763 "l1_local_load_miss",
764 "l1_local_store_hit",
765 "l1_local_store_miss",
768 "l1_global_load_hit",
769 "l1_global_load_miss",
770 "uncached_global_load_transaction",
771 "global_store_transaction",
777 /* metrics, i.e. functions of the MP counters */
778 "metric-ipc", /* inst_executed, clock */
779 "metric-ipac", /* inst_executed, active_cycles */
780 "metric-ipec", /* inst_executed, (bool)inst_executed */
781 "metric-achieved_occupancy", /* active_warps, active_cycles */
782 "metric-sm_efficiency", /* active_cycles, clock */
783 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
786 /* For simplicity, we will allocate as many group slots as we allocate counter
787 * slots. This means that a single counter which wants to source from 2 groups
788 * will have to be declared as using 2 counter slots. This shouldn't really be
789 * a problem because such queries don't make much sense ... (unless someone is
792 struct nve4_mp_counter_cfg
794 uint32_t func
: 16; /* mask or 4-bit logic op (depending on mode) */
795 uint32_t mode
: 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
797 uint32_t sig_dom
: 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
798 uint32_t sig_sel
: 8; /* signal group */
799 uint32_t src_sel
: 32; /* signal selection for up to 5 sources */
802 #define NVE4_COUNTER_OPn_SUM 0
803 #define NVE4_COUNTER_OPn_OR 1
804 #define NVE4_COUNTER_OPn_AND 2
805 #define NVE4_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
806 #define NVE4_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
807 #define NVE4_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
808 #define NVE4_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
810 struct nve4_mp_pm_query_cfg
812 struct nve4_mp_counter_cfg ctr
[4];
813 uint8_t num_counters
;
815 uint8_t norm
[2]; /* normalization num,denom */
818 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVE4_COUNTER_OPn_SUM, { nu, dn } }
819 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVE4_COUNTER_OPn_SUM, { nu, dn } }
820 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
821 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
822 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
823 {}, {}, }, 2, NVE4_COUNTER_OP2_##o, { nu, dn } }
824 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
825 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
826 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
827 {}, {}, }, 2, NVE4_COUNTER_OP2_##o, { nu, dn } }
828 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
829 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
830 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
831 {}, {}, }, 2, NVE4_COUNTER_OP2_##o, { nu, dn } }
834 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
835 * inst_executed etc.: we only count a single warp scheduler
836 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
837 * this is inaccurate !
839 static const struct nve4_mp_pm_query_cfg nve4_mp_pm_queries
[] =
841 _Q1A(PROF_TRIGGER_0
, 0x0001, B6
, USER
, 0x00000000, 1, 1),
842 _Q1A(PROF_TRIGGER_1
, 0x0001, B6
, USER
, 0x00000004, 1, 1),
843 _Q1A(PROF_TRIGGER_2
, 0x0001, B6
, USER
, 0x00000008, 1, 1),
844 _Q1A(PROF_TRIGGER_3
, 0x0001, B6
, USER
, 0x0000000c, 1, 1),
845 _Q1A(PROF_TRIGGER_4
, 0x0001, B6
, USER
, 0x00000010, 1, 1),
846 _Q1A(PROF_TRIGGER_5
, 0x0001, B6
, USER
, 0x00000014, 1, 1),
847 _Q1A(PROF_TRIGGER_6
, 0x0001, B6
, USER
, 0x00000018, 1, 1),
848 _Q1A(PROF_TRIGGER_7
, 0x0001, B6
, USER
, 0x0000001c, 1, 1),
849 _Q1A(LAUNCHED_WARPS
, 0x0001, B6
, LAUNCH
, 0x00000004, 1, 1),
850 _Q1A(LAUNCHED_THREADS
, 0x003f, B6
, LAUNCH
, 0x398a4188, 1, 1),
851 _Q1B(LAUNCHED_CTA
, 0x0001, B6
, WARP
, 0x0000001c, 1, 1),
852 _Q1A(INST_ISSUED1
, 0x0001, B6
, ISSUE
, 0x00000004, 1, 1),
853 _Q1A(INST_ISSUED2
, 0x0001, B6
, ISSUE
, 0x00000008, 1, 1),
854 _Q1A(INST_ISSUED
, 0x0003, B6
, ISSUE
, 0x00000104, 1, 1),
855 _Q1A(INST_EXECUTED
, 0x0003, B6
, EXEC
, 0x00000398, 1, 1),
856 _Q1A(LD_SHARED
, 0x0001, B6
, LDST
, 0x00000000, 1, 1),
857 _Q1A(ST_SHARED
, 0x0001, B6
, LDST
, 0x00000004, 1, 1),
858 _Q1A(LD_LOCAL
, 0x0001, B6
, LDST
, 0x00000008, 1, 1),
859 _Q1A(ST_LOCAL
, 0x0001, B6
, LDST
, 0x0000000c, 1, 1),
860 _Q1A(GLD_REQUEST
, 0x0001, B6
, LDST
, 0x00000010, 1, 1),
861 _Q1A(GST_REQUEST
, 0x0001, B6
, LDST
, 0x00000014, 1, 1),
862 _Q1B(L1_LOCAL_LOAD_HIT
, 0x0001, B6
, L1
, 0x00000000, 1, 1),
863 _Q1B(L1_LOCAL_LOAD_MISS
, 0x0001, B6
, L1
, 0x00000004, 1, 1),
864 _Q1B(L1_LOCAL_STORE_HIT
, 0x0001, B6
, L1
, 0x00000008, 1, 1),
865 _Q1B(L1_LOCAL_STORE_MISS
, 0x0001, B6
, L1
, 0x0000000c, 1, 1),
866 _Q1B(L1_GLOBAL_LOAD_HIT
, 0x0001, B6
, L1
, 0x00000010, 1, 1),
867 _Q1B(L1_GLOBAL_LOAD_MISS
, 0x0001, B6
, L1
, 0x00000014, 1, 1),
868 _Q1B(GLD_TRANSACTIONS_UNCACHED
, 0x0001, B6
, MEM
, 0x00000000, 1, 1),
869 _Q1B(GST_TRANSACTIONS
, 0x0001, B6
, MEM
, 0x00000004, 1, 1),
870 _Q1A(BRANCH
, 0x0001, B6
, BRANCH
, 0x0000000c, 1, 1),
871 _Q1A(BRANCH_DIVERGENT
, 0x0001, B6
, BRANCH
, 0x00000010, 1, 1),
872 _Q1B(ACTIVE_WARPS
, 0x003f, B6
, WARP
, 0x31483104, 2, 1),
873 _Q1B(ACTIVE_CYCLES
, 0x0001, B6
, WARP
, 0x00000000, 1, 1),
874 _M2AB(IPC
, 0x3, B6
, EXEC
, 0x398, 0xffff, LOGOP
, WARP
, 0x0, DIV_SUM_M0
, 10, 1),
875 _M2AB(IPAC
, 0x3, B6
, EXEC
, 0x398, 0x1, B6
, WARP
, 0x0, AVG_DIV_MM
, 10, 1),
876 _M2A(IPEC
, 0x3, B6
, EXEC
, 0x398, 0xe, LOGOP
, EXEC
, 0x398, AVG_DIV_MM
, 10, 1),
877 _M2A(INST_REPLAY_OHEAD
, 0x3, B6
, ISSUE
, 0x104, 0x3, B6
, EXEC
, 0x398, REL_SUM_MM
, 100, 1),
878 _M2B(MP_OCCUPANCY
, 0x3f, B6
, WARP
, 0x31483104, 0x01, B6
, WARP
, 0x0, AVG_DIV_MM
, 200, 64),
879 _M2B(MP_EFFICIENCY
, 0x01, B6
, WARP
, 0x0, 0xffff, LOGOP
, WARP
, 0x0, AVG_DIV_M0
, 100, 1),
888 nve4_mp_pm_query_begin(struct nvc0_context
*nvc0
, struct nvc0_query
*q
)
890 struct nvc0_screen
*screen
= nvc0
->screen
;
891 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
892 const struct nve4_mp_pm_query_cfg
*cfg
;
894 unsigned num_ab
[2] = { 0, 0 };
896 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
898 /* check if we have enough free counter slots */
899 for (i
= 0; i
< cfg
->num_counters
; ++i
)
900 num_ab
[cfg
->ctr
[i
].sig_dom
]++;
902 if (screen
->pm
.num_mp_pm_active
[0] + num_ab
[0] > 4 ||
903 screen
->pm
.num_mp_pm_active
[1] + num_ab
[1] > 4) {
904 NOUVEAU_ERR("Not enough free MP counter slots !\n");
908 assert(cfg
->num_counters
<= 4);
909 PUSH_SPACE(push
, 4 * 8 + 6);
911 if (!screen
->pm
.mp_counters_enabled
) {
912 screen
->pm
.mp_counters_enabled
= TRUE
;
913 BEGIN_NVC0(push
, SUBC_SW(0x06ac), 1);
914 PUSH_DATA (push
, 0x1fcb);
917 /* set sequence field to 0 (used to check if result is available) */
918 for (i
= 0; i
< screen
->mp_count
; ++i
)
919 q
->data
[i
* 10 + 10] = 0;
921 for (i
= 0; i
< cfg
->num_counters
; ++i
) {
922 const unsigned d
= cfg
->ctr
[i
].sig_dom
;
924 if (!screen
->pm
.num_mp_pm_active
[d
]) {
925 uint32_t m
= (1 << 22) | (1 << (7 + (8 * !d
)));
926 if (screen
->pm
.num_mp_pm_active
[!d
])
927 m
|= 1 << (7 + (8 * d
));
928 BEGIN_NVC0(push
, SUBC_SW(0x0600), 1);
931 screen
->pm
.num_mp_pm_active
[d
]++;
933 for (c
= d
* 4; c
< (d
* 4 + 4); ++c
) {
934 if (!screen
->pm
.mp_counter
[c
]) {
936 screen
->pm
.mp_counter
[c
] = (struct pipe_query
*)q
;
940 assert(c
<= (d
* 4 + 3)); /* must succeed, already checked for space */
942 /* configure and reset the counter(s) */
944 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_A_SIGSEL(c
& 3)), 1);
946 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_B_SIGSEL(c
& 3)), 1);
947 PUSH_DATA (push
, cfg
->ctr
[i
].sig_sel
);
948 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_SRCSEL(c
)), 1);
949 PUSH_DATA (push
, cfg
->ctr
[i
].src_sel
+ 0x2108421 * (c
& 3));
950 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_FUNC(c
)), 1);
951 PUSH_DATA (push
, (cfg
->ctr
[i
].func
<< 4) | cfg
->ctr
[i
].mode
);
952 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_SET(c
)), 1);
958 nve4_mp_pm_query_end(struct nvc0_context
*nvc0
, struct nvc0_query
*q
)
960 struct nvc0_screen
*screen
= nvc0
->screen
;
961 struct pipe_context
*pipe
= &nvc0
->base
.pipe
;
962 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
965 const uint block
[3] = { 32, 4, 1 };
966 const uint grid
[3] = { screen
->mp_count
, 1, 1 };
968 const struct nve4_mp_pm_query_cfg
*cfg
;
970 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
972 if (unlikely(!screen
->pm
.prog
)) {
973 struct nvc0_program
*prog
= CALLOC_STRUCT(nvc0_program
);
974 prog
->type
= PIPE_SHADER_COMPUTE
;
975 prog
->translated
= TRUE
;
977 prog
->code
= (uint32_t *)nve4_read_mp_pm_counters_code
;
978 prog
->code_size
= sizeof(nve4_read_mp_pm_counters_code
);
979 prog
->parm_size
= 12;
980 screen
->pm
.prog
= prog
;
983 /* disable all counting */
985 for (c
= 0; c
< 8; ++c
)
986 if (screen
->pm
.mp_counter
[c
])
987 IMMED_NVC0(push
, NVE4_COMPUTE(MP_PM_FUNC(c
)), 0);
988 /* release counters for this query */
989 for (c
= 0; c
< 8; ++c
) {
990 if (nvc0_query(screen
->pm
.mp_counter
[c
]) == q
) {
991 screen
->pm
.num_mp_pm_active
[c
/ 4]--;
992 screen
->pm
.mp_counter
[c
] = NULL
;
996 BCTX_REFN_bo(nvc0
->bufctx_cp
, CP_QUERY
, NOUVEAU_BO_GART
| NOUVEAU_BO_WR
,
1000 IMMED_NVC0(push
, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE
), 0);
1002 pipe
->bind_compute_state(pipe
, screen
->pm
.prog
);
1003 input
[0] = (q
->bo
->offset
+ q
->base
);
1004 input
[1] = (q
->bo
->offset
+ q
->base
) >> 32;
1005 input
[2] = q
->sequence
;
1006 pipe
->launch_grid(pipe
, block
, grid
, 0, input
);
1008 nouveau_bufctx_reset(nvc0
->bufctx_cp
, NVC0_BIND_CP_QUERY
);
1010 /* re-activate other counters */
1011 PUSH_SPACE(push
, 16);
1013 for (c
= 0; c
< 8; ++c
) {
1015 q
= nvc0_query(screen
->pm
.mp_counter
[c
]);
1018 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
1019 for (i
= 0; i
< cfg
->num_counters
; ++i
) {
1020 if (mask
& (1 << q
->ctr
[i
]))
1022 mask
|= 1 << q
->ctr
[i
];
1023 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_FUNC(q
->ctr
[i
])), 1);
1024 PUSH_DATA (push
, (cfg
->ctr
[i
].func
<< 4) | cfg
->ctr
[i
].mode
);
1029 /* Metric calculations:
1030 * sum(x) ... sum of x over all MPs
1031 * avg(x) ... average of x over all MPs
1033 * IPC : sum(inst_executed) / clock
1034 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1035 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1036 * MP_EFFICIENCY : avg(active_cycles / clock)
1038 * NOTE: Interpretation of IPC requires knowledge of MP count.
1041 nve4_mp_pm_query_result(struct nvc0_context
*nvc0
, struct nvc0_query
*q
,
1042 void *result
, boolean wait
)
1044 uint32_t count
[32][4];
1046 unsigned mp_count
= MIN2(nvc0
->screen
->mp_count_compute
, 32);
1048 const struct nve4_mp_pm_query_cfg
*cfg
;
1050 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
1052 for (p
= 0; p
< mp_count
; ++p
) {
1053 const unsigned b
= (0x60 / 4) * p
;
1055 for (c
= 0; c
< cfg
->num_counters
; ++c
) {
1057 for (d
= 0; d
< ((q
->ctr
[c
] & ~3) ? 1 : 4); ++d
) {
1058 if (q
->data
[b
+ 20 + d
] != q
->sequence
) {
1061 if (nouveau_bo_wait(q
->bo
, NOUVEAU_BO_RD
, nvc0
->base
.client
))
1064 if (q
->ctr
[c
] & ~0x3)
1065 count
[p
][c
] = q
->data
[b
+ 16 + (q
->ctr
[c
] & 3)];
1067 count
[p
][c
] += q
->data
[b
+ d
* 4 + q
->ctr
[c
]];
1072 if (cfg
->op
== NVE4_COUNTER_OPn_SUM
) {
1073 for (c
= 0; c
< cfg
->num_counters
; ++c
)
1074 for (p
= 0; p
< mp_count
; ++p
)
1075 value
+= count
[p
][c
];
1076 value
= (value
* cfg
->norm
[0]) / cfg
->norm
[1];
1078 if (cfg
->op
== NVE4_COUNTER_OPn_OR
) {
1080 for (c
= 0; c
< cfg
->num_counters
; ++c
)
1081 for (p
= 0; p
< mp_count
; ++p
)
1083 value
= (v
* cfg
->norm
[0]) / cfg
->norm
[1];
1085 if (cfg
->op
== NVE4_COUNTER_OPn_AND
) {
1087 for (c
= 0; c
< cfg
->num_counters
; ++c
)
1088 for (p
= 0; p
< mp_count
; ++p
)
1090 value
= (v
* cfg
->norm
[0]) / cfg
->norm
[1];
1092 if (cfg
->op
== NVE4_COUNTER_OP2_REL_SUM_MM
) {
1093 uint64_t v
[2] = { 0, 0 };
1094 for (p
= 0; p
< mp_count
; ++p
) {
1095 v
[0] += count
[p
][0];
1096 v
[1] += count
[p
][1];
1099 value
= ((v
[0] - v
[1]) * cfg
->norm
[0]) / (v
[0] * cfg
->norm
[1]);
1101 if (cfg
->op
== NVE4_COUNTER_OP2_DIV_SUM_M0
) {
1102 for (p
= 0; p
< mp_count
; ++p
)
1103 value
+= count
[p
][0];
1105 value
= (value
* cfg
->norm
[0]) / (count
[0][1] * cfg
->norm
[1]);
1109 if (cfg
->op
== NVE4_COUNTER_OP2_AVG_DIV_MM
) {
1110 unsigned mp_used
= 0;
1111 for (p
= 0; p
< mp_count
; ++p
, mp_used
+= !!count
[p
][0])
1113 value
+= (count
[p
][0] * cfg
->norm
[0]) / count
[p
][1];
1115 value
/= mp_used
* cfg
->norm
[1];
1117 if (cfg
->op
== NVE4_COUNTER_OP2_AVG_DIV_M0
) {
1118 unsigned mp_used
= 0;
1119 for (p
= 0; p
< mp_count
; ++p
, mp_used
+= !!count
[p
][0])
1120 value
+= count
[p
][0];
1121 if (count
[0][1] && mp_used
) {
1122 value
*= cfg
->norm
[0];
1123 value
/= count
[0][1] * mp_used
* cfg
->norm
[1];
1129 *(uint64_t *)result
= value
;
1134 nvc0_screen_get_driver_query_info(struct pipe_screen
*pscreen
,
1136 struct pipe_driver_query_info
*info
)
1138 struct nvc0_screen
*screen
= nvc0_screen(pscreen
);
1141 count
+= NVC0_QUERY_DRV_STAT_COUNT
;
1143 if (screen
->base
.class_3d
>= NVE4_3D_CLASS
) {
1144 if (screen
->base
.device
->drm_version
>= 0x01000101)
1145 count
+= NVE4_PM_QUERY_COUNT
;
1150 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1151 if (id
< NVC0_QUERY_DRV_STAT_COUNT
) {
1152 info
->name
= nvc0_drv_stat_names
[id
];
1153 info
->query_type
= NVC0_QUERY_DRV_STAT(id
);
1154 info
->max_value
= ~0ULL;
1155 info
->uses_byte_units
= !!strstr(info
->name
, "bytes");
1160 info
->name
= nve4_pm_query_names
[id
- NVC0_QUERY_DRV_STAT_COUNT
];
1161 info
->query_type
= NVE4_PM_QUERY(id
- NVC0_QUERY_DRV_STAT_COUNT
);
1162 info
->max_value
= (id
< NVE4_PM_QUERY_METRIC_MP_OCCUPANCY
) ?
1164 info
->uses_byte_units
= FALSE
;
1167 /* user asked for info about non-existing query */
1168 info
->name
= "this_is_not_the_query_you_are_looking_for";
1169 info
->query_type
= 0xdeadd01d;
1170 info
->max_value
= 0;
1171 info
->uses_byte_units
= FALSE
;
1176 nvc0_init_query_functions(struct nvc0_context
*nvc0
)
1178 struct pipe_context
*pipe
= &nvc0
->base
.pipe
;
1180 pipe
->create_query
= nvc0_query_create
;
1181 pipe
->destroy_query
= nvc0_query_destroy
;
1182 pipe
->begin_query
= nvc0_query_begin
;
1183 pipe
->end_query
= nvc0_query_end
;
1184 pipe
->get_query_result
= nvc0_query_result
;
1185 pipe
->render_condition
= nvc0_render_condition
;