2 * Copyright 2011 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * Authors: Christoph Bumiller
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
27 #include "nvc0_context.h"
28 #include "nouveau/nv_object.xml.h"
29 #include "nve4_compute.xml.h"
31 #define NVC0_QUERY_STATE_READY 0
32 #define NVC0_QUERY_STATE_ACTIVE 1
33 #define NVC0_QUERY_STATE_ENDED 2
34 #define NVC0_QUERY_STATE_FLUSHED 3
42 struct nouveau_bo
*bo
;
44 uint32_t offset
; /* base + i * rotate */
48 int nesting
; /* only used for occlusion queries */
49 struct nouveau_mm_allocation
*mm
;
52 #define NVC0_QUERY_ALLOC_SPACE 256
54 static void nve4_mp_pm_query_begin(struct nvc0_context
*, struct nvc0_query
*);
55 static void nve4_mp_pm_query_end(struct nvc0_context
*, struct nvc0_query
*);
56 static boolean
nve4_mp_pm_query_result(struct nvc0_context
*,
57 struct nvc0_query
*, void *, boolean
);
59 static INLINE
struct nvc0_query
*
60 nvc0_query(struct pipe_query
*pipe
)
62 return (struct nvc0_query
*)pipe
;
66 nvc0_query_allocate(struct nvc0_context
*nvc0
, struct nvc0_query
*q
, int size
)
68 struct nvc0_screen
*screen
= nvc0
->screen
;
72 nouveau_bo_ref(NULL
, &q
->bo
);
74 if (q
->state
== NVC0_QUERY_STATE_READY
)
75 nouveau_mm_free(q
->mm
);
77 nouveau_fence_work(screen
->base
.fence
.current
,
78 nouveau_mm_free_work
, q
->mm
);
82 q
->mm
= nouveau_mm_allocate(screen
->base
.mm_GART
, size
, &q
->bo
, &q
->base
);
87 ret
= nouveau_bo_map(q
->bo
, 0, screen
->base
.client
);
89 nvc0_query_allocate(nvc0
, q
, 0);
92 q
->data
= (uint32_t *)((uint8_t *)q
->bo
->map
+ q
->base
);
98 nvc0_query_destroy(struct pipe_context
*pipe
, struct pipe_query
*pq
)
100 nvc0_query_allocate(nvc0_context(pipe
), nvc0_query(pq
), 0);
101 FREE(nvc0_query(pq
));
104 static struct pipe_query
*
105 nvc0_query_create(struct pipe_context
*pipe
, unsigned type
)
107 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
108 struct nvc0_query
*q
;
109 unsigned space
= NVC0_QUERY_ALLOC_SPACE
;
111 q
= CALLOC_STRUCT(nvc0_query
);
116 case PIPE_QUERY_OCCLUSION_COUNTER
:
117 case PIPE_QUERY_OCCLUSION_PREDICATE
:
119 space
= NVC0_QUERY_ALLOC_SPACE
;
121 case PIPE_QUERY_PIPELINE_STATISTICS
:
125 case PIPE_QUERY_SO_STATISTICS
:
126 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
130 case PIPE_QUERY_TIME_ELAPSED
:
131 case PIPE_QUERY_TIMESTAMP
:
132 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
133 case PIPE_QUERY_GPU_FINISHED
:
134 case PIPE_QUERY_PRIMITIVES_GENERATED
:
135 case PIPE_QUERY_PRIMITIVES_EMITTED
:
138 case NVC0_QUERY_TFB_BUFFER_OFFSET
:
142 if (nvc0
->screen
->base
.class_3d
>= NVE4_3D_CLASS
&&
143 nvc0
->screen
->base
.device
->drm_version
>= 0x01000101) {
144 if (type
>= NVE4_PM_QUERY(0) &&
145 type
<= NVE4_PM_QUERY_MAX
) {
146 /* 8 counters per MP + clock */
147 space
= 12 * nvc0
->screen
->mp_count
* sizeof(uint32_t);
151 debug_printf("invalid query type: %u\n", type
);
155 if (!nvc0_query_allocate(nvc0
, q
, space
)) {
163 /* we advance before query_begin ! */
164 q
->offset
-= q
->rotate
;
165 q
->data
-= q
->rotate
/ sizeof(*q
->data
);
168 q
->data
[0] = 0; /* initialize sequence */
170 return (struct pipe_query
*)q
;
174 nvc0_query_get(struct nouveau_pushbuf
*push
, struct nvc0_query
*q
,
175 unsigned offset
, uint32_t get
)
180 PUSH_REFN (push
, q
->bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_WR
);
181 BEGIN_NVC0(push
, NVC0_3D(QUERY_ADDRESS_HIGH
), 4);
182 PUSH_DATAh(push
, q
->bo
->offset
+ offset
);
183 PUSH_DATA (push
, q
->bo
->offset
+ offset
);
184 PUSH_DATA (push
, q
->sequence
);
185 PUSH_DATA (push
, get
);
189 nvc0_query_rotate(struct nvc0_context
*nvc0
, struct nvc0_query
*q
)
191 q
->offset
+= q
->rotate
;
192 q
->data
+= q
->rotate
/ sizeof(*q
->data
);
193 if (q
->offset
- q
->base
== NVC0_QUERY_ALLOC_SPACE
)
194 nvc0_query_allocate(nvc0
, q
, NVC0_QUERY_ALLOC_SPACE
);
198 nvc0_query_begin(struct pipe_context
*pipe
, struct pipe_query
*pq
)
200 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
201 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
202 struct nvc0_query
*q
= nvc0_query(pq
);
204 /* For occlusion queries we have to change the storage, because a previous
205 * query might set the initial render conition to FALSE even *after* we re-
206 * initialized it to TRUE.
209 nvc0_query_rotate(nvc0
, q
);
211 /* XXX: can we do this with the GPU, and sync with respect to a previous
214 q
->data
[0] = q
->sequence
; /* initialize sequence */
215 q
->data
[1] = 1; /* initial render condition = TRUE */
216 q
->data
[4] = q
->sequence
+ 1; /* for comparison COND_MODE */
222 case PIPE_QUERY_OCCLUSION_COUNTER
:
223 case PIPE_QUERY_OCCLUSION_PREDICATE
:
224 q
->nesting
= nvc0
->screen
->num_occlusion_queries_active
++;
226 nvc0_query_get(push
, q
, 0x10, 0x0100f002);
229 BEGIN_NVC0(push
, NVC0_3D(COUNTER_RESET
), 1);
230 PUSH_DATA (push
, NVC0_3D_COUNTER_RESET_SAMPLECNT
);
231 IMMED_NVC0(push
, NVC0_3D(SAMPLECNT_ENABLE
), 1);
234 case PIPE_QUERY_PRIMITIVES_GENERATED
:
235 nvc0_query_get(push
, q
, 0x10, 0x06805002 | (q
->index
<< 5));
237 case PIPE_QUERY_PRIMITIVES_EMITTED
:
238 nvc0_query_get(push
, q
, 0x10, 0x05805002 | (q
->index
<< 5));
240 case PIPE_QUERY_SO_STATISTICS
:
241 nvc0_query_get(push
, q
, 0x20, 0x05805002 | (q
->index
<< 5));
242 nvc0_query_get(push
, q
, 0x30, 0x06805002 | (q
->index
<< 5));
244 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
245 nvc0_query_get(push
, q
, 0x10, 0x03005002 | (q
->index
<< 5));
247 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
248 case PIPE_QUERY_TIME_ELAPSED
:
249 nvc0_query_get(push
, q
, 0x10, 0x00005002);
251 case PIPE_QUERY_PIPELINE_STATISTICS
:
252 nvc0_query_get(push
, q
, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
253 nvc0_query_get(push
, q
, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
254 nvc0_query_get(push
, q
, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
255 nvc0_query_get(push
, q
, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
256 nvc0_query_get(push
, q
, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
257 nvc0_query_get(push
, q
, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
258 nvc0_query_get(push
, q
, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
259 nvc0_query_get(push
, q
, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
260 nvc0_query_get(push
, q
, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
261 nvc0_query_get(push
, q
, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
264 if (q
->type
>= NVE4_PM_QUERY(0) && q
->type
<= NVE4_PM_QUERY_MAX
)
265 nve4_mp_pm_query_begin(nvc0
, q
);
268 q
->state
= NVC0_QUERY_STATE_ACTIVE
;
272 nvc0_query_end(struct pipe_context
*pipe
, struct pipe_query
*pq
)
274 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
275 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
276 struct nvc0_query
*q
= nvc0_query(pq
);
278 if (q
->state
!= NVC0_QUERY_STATE_ACTIVE
) {
279 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
281 nvc0_query_rotate(nvc0
, q
);
284 q
->state
= NVC0_QUERY_STATE_ENDED
;
287 case PIPE_QUERY_OCCLUSION_COUNTER
:
288 case PIPE_QUERY_OCCLUSION_PREDICATE
:
289 nvc0_query_get(push
, q
, 0, 0x0100f002);
290 if (--nvc0
->screen
->num_occlusion_queries_active
== 0) {
292 IMMED_NVC0(push
, NVC0_3D(SAMPLECNT_ENABLE
), 0);
295 case PIPE_QUERY_PRIMITIVES_GENERATED
:
296 nvc0_query_get(push
, q
, 0, 0x06805002 | (q
->index
<< 5));
298 case PIPE_QUERY_PRIMITIVES_EMITTED
:
299 nvc0_query_get(push
, q
, 0, 0x05805002 | (q
->index
<< 5));
301 case PIPE_QUERY_SO_STATISTICS
:
302 nvc0_query_get(push
, q
, 0x00, 0x05805002 | (q
->index
<< 5));
303 nvc0_query_get(push
, q
, 0x10, 0x06805002 | (q
->index
<< 5));
305 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
306 /* TODO: How do we sum over all streams for render condition ? */
307 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
308 nvc0_query_get(push
, q
, 0x00, 0x03005002 | (q
->index
<< 5));
309 nvc0_query_get(push
, q
, 0x20, 0x00005002);
311 case PIPE_QUERY_TIMESTAMP
:
312 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
313 case PIPE_QUERY_TIME_ELAPSED
:
314 nvc0_query_get(push
, q
, 0, 0x00005002);
316 case PIPE_QUERY_GPU_FINISHED
:
317 nvc0_query_get(push
, q
, 0, 0x1000f010);
319 case PIPE_QUERY_PIPELINE_STATISTICS
:
320 nvc0_query_get(push
, q
, 0x00, 0x00801002); /* VFETCH, VERTICES */
321 nvc0_query_get(push
, q
, 0x10, 0x01801002); /* VFETCH, PRIMS */
322 nvc0_query_get(push
, q
, 0x20, 0x02802002); /* VP, LAUNCHES */
323 nvc0_query_get(push
, q
, 0x30, 0x03806002); /* GP, LAUNCHES */
324 nvc0_query_get(push
, q
, 0x40, 0x04806002); /* GP, PRIMS_OUT */
325 nvc0_query_get(push
, q
, 0x50, 0x07804002); /* RAST, PRIMS_IN */
326 nvc0_query_get(push
, q
, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
327 nvc0_query_get(push
, q
, 0x70, 0x0980a002); /* ROP, PIXELS */
328 nvc0_query_get(push
, q
, 0x80, 0x0d808002); /* TCP, LAUNCHES */
329 nvc0_query_get(push
, q
, 0x90, 0x0e809002); /* TEP, LAUNCHES */
331 case NVC0_QUERY_TFB_BUFFER_OFFSET
:
332 /* indexed by TFB buffer instead of by vertex stream */
333 nvc0_query_get(push
, q
, 0x00, 0x0d005002 | (q
->index
<< 5));
336 if (q
->type
>= NVE4_PM_QUERY(0) && q
->type
<= NVE4_PM_QUERY_MAX
)
337 nve4_mp_pm_query_end(nvc0
, q
);
343 nvc0_query_update(struct nouveau_client
*cli
, struct nvc0_query
*q
)
346 if (!nouveau_bo_map(q
->bo
, NOUVEAU_BO_RD
| NOUVEAU_BO_NOBLOCK
, cli
))
347 q
->state
= NVC0_QUERY_STATE_READY
;
349 if (q
->data
[0] == q
->sequence
)
350 q
->state
= NVC0_QUERY_STATE_READY
;
355 nvc0_query_result(struct pipe_context
*pipe
, struct pipe_query
*pq
,
356 boolean wait
, union pipe_query_result
*result
)
358 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
359 struct nvc0_query
*q
= nvc0_query(pq
);
360 uint64_t *res64
= (uint64_t*)result
;
361 uint32_t *res32
= (uint32_t*)result
;
362 boolean
*res8
= (boolean
*)result
;
363 uint64_t *data64
= (uint64_t *)q
->data
;
366 if (q
->type
>= NVE4_PM_QUERY(0) && q
->type
<= NVE4_PM_QUERY_MAX
)
367 return nve4_mp_pm_query_result(nvc0
, q
, result
, wait
);
369 if (q
->state
!= NVC0_QUERY_STATE_READY
)
370 nvc0_query_update(nvc0
->screen
->base
.client
, q
);
372 if (q
->state
!= NVC0_QUERY_STATE_READY
) {
374 if (q
->state
!= NVC0_QUERY_STATE_FLUSHED
) {
375 q
->state
= NVC0_QUERY_STATE_FLUSHED
;
376 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
377 PUSH_KICK(nvc0
->base
.pushbuf
);
381 if (nouveau_bo_wait(q
->bo
, NOUVEAU_BO_RD
, nvc0
->screen
->base
.client
))
384 q
->state
= NVC0_QUERY_STATE_READY
;
387 case PIPE_QUERY_GPU_FINISHED
:
390 case PIPE_QUERY_OCCLUSION_COUNTER
: /* u32 sequence, u32 count, u64 time */
391 res64
[0] = q
->data
[1] - q
->data
[5];
393 case PIPE_QUERY_OCCLUSION_PREDICATE
:
394 res8
[0] = q
->data
[1] != q
->data
[5];
396 case PIPE_QUERY_PRIMITIVES_GENERATED
: /* u64 count, u64 time */
397 case PIPE_QUERY_PRIMITIVES_EMITTED
: /* u64 count, u64 time */
398 res64
[0] = data64
[0] - data64
[2];
400 case PIPE_QUERY_SO_STATISTICS
:
401 res64
[0] = data64
[0] - data64
[4];
402 res64
[1] = data64
[2] - data64
[6];
404 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
405 res8
[0] = data64
[0] != data64
[2];
407 case PIPE_QUERY_TIMESTAMP
:
408 res64
[0] = data64
[1];
410 case PIPE_QUERY_TIMESTAMP_DISJOINT
: /* u32 sequence, u32 0, u64 time */
411 res64
[0] = 1000000000;
412 res8
[8] = (data64
[1] == data64
[3]) ? FALSE
: TRUE
;
414 case PIPE_QUERY_TIME_ELAPSED
:
415 res64
[0] = data64
[1] - data64
[3];
417 case PIPE_QUERY_PIPELINE_STATISTICS
:
418 for (i
= 0; i
< 10; ++i
)
419 res64
[i
] = data64
[i
* 2] - data64
[24 + i
* 2];
421 case NVC0_QUERY_TFB_BUFFER_OFFSET
:
422 res32
[0] = q
->data
[1];
425 assert(0); /* can't happen, we don't create queries with invalid type */
433 nvc0_query_fifo_wait(struct nouveau_pushbuf
*push
, struct pipe_query
*pq
)
435 struct nvc0_query
*q
= nvc0_query(pq
);
436 unsigned offset
= q
->offset
;
438 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
) offset
+= 0x20;
441 PUSH_REFN (push
, q
->bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_RD
);
442 BEGIN_NVC0(push
, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH
), 4);
443 PUSH_DATAh(push
, q
->bo
->offset
+ offset
);
444 PUSH_DATA (push
, q
->bo
->offset
+ offset
);
445 PUSH_DATA (push
, q
->sequence
);
446 PUSH_DATA (push
, (1 << 12) |
447 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL
);
451 nvc0_render_condition(struct pipe_context
*pipe
,
452 struct pipe_query
*pq
, uint mode
)
454 struct nvc0_context
*nvc0
= nvc0_context(pipe
);
455 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
456 struct nvc0_query
*q
;
458 boolean negated
= FALSE
;
460 mode
!= PIPE_RENDER_COND_NO_WAIT
&&
461 mode
!= PIPE_RENDER_COND_BY_REGION_NO_WAIT
;
463 nvc0
->cond_query
= pq
;
464 nvc0
->cond_mode
= mode
;
468 IMMED_NVC0(push
, NVC0_3D(COND_MODE
), NVC0_3D_COND_MODE_ALWAYS
);
473 /* NOTE: comparison of 2 queries only works if both have completed */
475 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
476 cond
= negated
? NVC0_3D_COND_MODE_EQUAL
:
477 NVC0_3D_COND_MODE_NOT_EQUAL
;
480 case PIPE_QUERY_OCCLUSION_COUNTER
:
481 case PIPE_QUERY_OCCLUSION_PREDICATE
:
482 if (likely(!negated
)) {
483 if (unlikely(q
->nesting
))
484 cond
= wait
? NVC0_3D_COND_MODE_NOT_EQUAL
:
485 NVC0_3D_COND_MODE_ALWAYS
;
487 cond
= NVC0_3D_COND_MODE_RES_NON_ZERO
;
489 cond
= wait
? NVC0_3D_COND_MODE_EQUAL
: NVC0_3D_COND_MODE_ALWAYS
;
493 assert(!"render condition query not a predicate");
494 mode
= NVC0_3D_COND_MODE_ALWAYS
;
499 nvc0_query_fifo_wait(push
, pq
);
502 PUSH_REFN (push
, q
->bo
, NOUVEAU_BO_GART
| NOUVEAU_BO_RD
);
503 BEGIN_NVC0(push
, NVC0_3D(COND_ADDRESS_HIGH
), 3);
504 PUSH_DATAh(push
, q
->bo
->offset
+ q
->offset
);
505 PUSH_DATA (push
, q
->bo
->offset
+ q
->offset
);
506 PUSH_DATA (push
, cond
);
510 nvc0_query_pushbuf_submit(struct nouveau_pushbuf
*push
,
511 struct pipe_query
*pq
, unsigned result_offset
)
513 struct nvc0_query
*q
= nvc0_query(pq
);
515 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
517 nouveau_pushbuf_space(push
, 0, 0, 1);
518 nouveau_pushbuf_data(push
, q
->bo
, q
->offset
+ result_offset
, 4 |
519 NVC0_IB_ENTRY_1_NO_PREFETCH
);
523 nvc0_so_target_save_offset(struct pipe_context
*pipe
,
524 struct pipe_stream_output_target
*ptarg
,
525 unsigned index
, boolean
*serialize
)
527 struct nvc0_so_target
*targ
= nvc0_so_target(ptarg
);
531 PUSH_SPACE(nvc0_context(pipe
)->base
.pushbuf
, 1);
532 IMMED_NVC0(nvc0_context(pipe
)->base
.pushbuf
, NVC0_3D(SERIALIZE
), 0);
535 nvc0_query(targ
->pq
)->index
= index
;
537 nvc0_query_end(pipe
, targ
->pq
);
541 /* === PERFORMANCE MONITORING COUNTERS === */
543 /* Code to read out MP counters: They are accessible via mmio, too, but let's
544 * just avoid mapping registers in userspace. We'd have to know which MPs are
545 * enabled/present, too, and that information is not presently exposed.
546 * We could add a kernel interface for it, but reading the counters like this
547 * has the advantage of being async (if get_result isn't called immediately).
549 static const uint64_t nve4_read_mp_pm_counters_code
[] =
551 0x2042004270420047ULL
, /* sched */
552 0x2800400000001de4ULL
, /* mov b32 $r0 c0[0] (04) */
553 0x2c0000000c009c04ULL
, /* mov b32 $r2 $physid (20) */
554 0x2800400010005de4ULL
, /* mov b32 $r1 c0[4] (04) */
555 0x2c0000008400dc04ULL
, /* mov b32 $r3 $tidx (27) */
556 0x7000c01050209c03ULL
, /* ext u32 $r2 $r2 0x0414 (04) */
557 0x2c00000010011c04ULL
, /* mov b32 $r4 $pm0 (20) */
558 0x190e0000fc33dc03ULL
, /* set $p1 eq u32 $r3 0 (04) */
559 0x2280428042804277ULL
, /* sched */
560 0x2c00000014015c04ULL
, /* mov b32 $r5 $pm1 (27) */
561 0x10000000c0209c02ULL
, /* mul $r2 u32 $r2 u32 48 (04) */
562 0x2c00000018019c04ULL
, /* mov b32 $r6 $pm2 (28) */
563 0x4801000008001c03ULL
, /* add b32 ($r0 $c) $r0 $r2 (04) */
564 0x2c0000001c01dc04ULL
, /* mov b32 $r7 $pm3 (28) */
565 0x0800000000105c42ULL
, /* add b32 $r1 $r1 0 $c (04) */
566 0x2c00000140009c04ULL
, /* mov b32 $r2 $clock (28) */
567 0x2042804200420047ULL
, /* sched */
568 0x94000000000107c5ULL
, /* $p1 st b128 wt g[$r0d] $r4q (04) */
569 0x2c00000020011c04ULL
, /* mov b32 $r4 $pm4 (20) */
570 0x2c00000024015c04ULL
, /* mov b32 $r5 $pm5 (04) */
571 0x2c00000028019c04ULL
, /* mov b32 $r6 $pm6 (20) */
572 0x2c0000002c01dc04ULL
, /* mov b32 $r7 $pm7 (04) */
573 0x2c0000014400dc04ULL
, /* mov b32 $r3 $clockhi (28) */
574 0x94000000400107c5ULL
, /* $p1 st b128 wt g[$r0d+16] $r4q (04) */
575 0x200002e042804207ULL
, /* sched */
576 0x2800400020011de4ULL
, /* mov b32 $r4 c0[8] (20) */
577 0x2c0000000c015c04ULL
, /* mov b32 $r5 $physid (04) */
578 0x94000000800087a5ULL
, /* $p1 st b64 wt g[$r0d+32] $r2d (28) */
579 0x94000000a00107a5ULL
, /* $p1 st b64 wt g[$r0d+40] $r4d (04) */
580 0x8000000000001de7ULL
/* exit (2e) */
583 /* NOTE: intentionally using the same names as NV */
584 static const char *nve4_pm_query_names
[] =
606 "l1_local_load_miss",
607 "l1_local_store_hit",
608 "l1_local_store_miss",
611 "l1_global_load_hit",
612 "l1_global_load_miss",
613 "uncached_global_load_transaction",
614 "global_store_transaction",
621 /* For simplicity, we will allocate as many group slots as we allocate counter
622 * slots. This means that a single counter which wants to source from 2 groups
623 * will have to be declared as using 2 counter slots. This shouldn't really be
624 * a problem because such queries don't make much sense ... (unless someone is
627 struct nve4_mp_counter_cfg
629 uint32_t func
: 16; /* mask or 4-bit logic op (depending on mode) */
630 uint32_t mode
: 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
632 uint32_t sig_dom
: 1; /* if 0, MP_PM_A, if 1, MP_PM_B */
633 uint32_t sig_sel
: 8; /* signal group */
634 uint32_t src_sel
: 32; /* signal selection for up to 5 sources */
637 struct nve4_mp_pm_query_cfg
639 struct nve4_mp_counter_cfg ctr
[4];
640 uint8_t num_counters
;
641 uint8_t op
; /* PIPE_LOGICOP_CLEAR(for ADD),OR,AND */
644 #define _Q1A(n, f, m, g, s) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, PIPE_LOGICOP_CLEAR }
645 #define _Q1B(n, f, m, g, s) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, PIPE_LOGICOP_CLEAR }
647 static const struct nve4_mp_pm_query_cfg nve4_mp_pm_queries
[] =
649 _Q1A(PROF_TRIGGER_0
, 0x0001, B6
, USER
, 0x00000000),
650 _Q1A(PROF_TRIGGER_1
, 0x0001, B6
, USER
, 0x00000004),
651 _Q1A(PROF_TRIGGER_2
, 0x0001, B6
, USER
, 0x00000008),
652 _Q1A(PROF_TRIGGER_3
, 0x0001, B6
, USER
, 0x0000000c),
653 _Q1A(PROF_TRIGGER_4
, 0x0001, B6
, USER
, 0x00000010),
654 _Q1A(PROF_TRIGGER_5
, 0x0001, B6
, USER
, 0x00000014),
655 _Q1A(PROF_TRIGGER_6
, 0x0001, B6
, USER
, 0x00000018),
656 _Q1A(PROF_TRIGGER_7
, 0x0001, B6
, USER
, 0x0000001c),
657 _Q1A(LAUNCHED_WARPS
, 0x0001, B6
, LAUNCH
, 0x00000004),
658 _Q1A(LAUNCHED_THREADS
, 0x003f, B6
, LAUNCH
, 0x398a4188),
659 _Q1B(LAUNCHED_CTA
, 0x0001, B6
, WARP
, 0x0000001c),
660 _Q1A(INST_ISSUED1
, 0x0001, B6
, ISSUE
, 0x00000004),
661 _Q1A(INST_ISSUED2
, 0x0001, B6
, ISSUE
, 0x00000008),
662 _Q1A(INST_EXECUTED
, 0x0003, B6
, EXEC
, 0x00000398),
663 _Q1A(LD_SHARED
, 0x0001, B6
, LDST
, 0x00000000),
664 _Q1A(ST_SHARED
, 0x0001, B6
, LDST
, 0x00000004),
665 _Q1A(LD_LOCAL
, 0x0001, B6
, LDST
, 0x00000008),
666 _Q1A(ST_LOCAL
, 0x0001, B6
, LDST
, 0x0000000c),
667 _Q1A(GLD_REQUEST
, 0x0001, B6
, LDST
, 0x00000010),
668 _Q1A(GST_REQUEST
, 0x0001, B6
, LDST
, 0x00000014),
669 _Q1B(L1_LOCAL_LOAD_HIT
, 0x0001, B6
, L1
, 0x00000000),
670 _Q1B(L1_LOCAL_LOAD_MISS
, 0x0001, B6
, L1
, 0x00000004),
671 _Q1B(L1_LOCAL_STORE_HIT
, 0x0001, B6
, L1
, 0x00000008),
672 _Q1B(L1_LOCAL_STORE_MISS
, 0x0001, B6
, L1
, 0x0000000c),
673 _Q1B(L1_GLOBAL_LOAD_HIT
, 0x0001, B6
, L1
, 0x00000010),
674 _Q1B(L1_GLOBAL_LOAD_MISS
, 0x0001, B6
, L1
, 0x00000014),
675 _Q1B(GLD_TRANSACTIONS_UNCACHED
, 0x0001, B6
, MEM
, 0x00000000),
676 _Q1B(GST_TRANSACTIONS
, 0x0001, B6
, MEM
, 0x00000004),
677 _Q1A(BRANCH
, 0x0001, B6
, BRANCH
, 0x0000000c),
678 _Q1A(BRANCH_DIVERGENT
, 0x0001, B6
, BRANCH
, 0x00000010),
679 _Q1B(ACTIVE_WARPS
, 0x003f, B6
, WARP
, 0x398a4188),
680 _Q1B(ACTIVE_CYCLES
, 0x0001, B6
, WARP
, 0x00000004)
687 nve4_mp_pm_query_begin(struct nvc0_context
*nvc0
, struct nvc0_query
*q
)
689 struct nvc0_screen
*screen
= nvc0
->screen
;
690 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
691 const struct nve4_mp_pm_query_cfg
*cfg
;
693 unsigned num_ab
[2] = { 0, 0 };
695 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
697 /* check if we have enough free counter slots */
698 for (i
= 0; i
< cfg
->num_counters
; ++i
)
699 num_ab
[cfg
->ctr
[i
].sig_dom
]++;
701 if (screen
->pm
.num_mp_pm_active
[0] + num_ab
[0] > 4 ||
702 screen
->pm
.num_mp_pm_active
[1] + num_ab
[1] > 4) {
703 NOUVEAU_ERR("Not enough free MP counter slots !\n");
707 assert(cfg
->num_counters
<= 4);
708 PUSH_SPACE(push
, 4 * 8 + 6);
710 if (!screen
->pm
.mp_counters_enabled
) {
711 screen
->pm
.mp_counters_enabled
= TRUE
;
712 BEGIN_NVC0(push
, SUBC_SW(0x06ac), 1);
713 PUSH_DATA (push
, 0x1fcb);
716 /* set sequence field to 0 (used to check if result is available) */
717 for (i
= 0; i
< screen
->mp_count
; ++i
)
718 q
->data
[i
* 10 + 10] = 0;
720 for (i
= 0; i
< cfg
->num_counters
; ++i
) {
721 const unsigned d
= cfg
->ctr
[i
].sig_dom
;
723 if (!screen
->pm
.num_mp_pm_active
[d
]) {
724 uint32_t m
= (1 << 22) | (1 << (7 + (8 * !d
)));
725 if (screen
->pm
.num_mp_pm_active
[!d
])
726 m
|= 1 << (7 + (8 * d
));
727 BEGIN_NVC0(push
, SUBC_SW(0x0600), 1);
730 screen
->pm
.num_mp_pm_active
[d
]++;
732 for (c
= d
* 4; c
< (d
* 4 + 4); ++c
) {
733 if (!screen
->pm
.mp_counter
[c
]) {
735 screen
->pm
.mp_counter
[c
] = (struct pipe_query
*)q
;
739 assert(c
<= (d
* 4 + 3)); /* must succeed, already checked for space */
741 /* configure and reset the counter(s) */
743 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_A_SIGSEL(c
& 3)), 1);
745 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_B_SIGSEL(c
& 3)), 1);
746 PUSH_DATA (push
, cfg
->ctr
[i
].sig_sel
);
747 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_SRCSEL(c
)), 1);
748 PUSH_DATA (push
, cfg
->ctr
[i
].src_sel
+ 0x2108421 * (c
& 3));
749 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_FUNC(c
)), 1);
750 PUSH_DATA (push
, (cfg
->ctr
[i
].func
<< 4) | cfg
->ctr
[i
].mode
);
751 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_SET(c
)), 1);
757 nve4_mp_pm_query_end(struct nvc0_context
*nvc0
, struct nvc0_query
*q
)
759 struct nvc0_screen
*screen
= nvc0
->screen
;
760 struct pipe_context
*pipe
= &nvc0
->base
.pipe
;
761 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
764 const uint block
[3] = { 32, 1, 1 };
765 const uint grid
[3] = { screen
->mp_count
, 1, 1 };
767 const struct nve4_mp_pm_query_cfg
*cfg
;
769 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
771 if (unlikely(!screen
->pm
.prog
)) {
772 struct nvc0_program
*prog
= CALLOC_STRUCT(nvc0_program
);
773 prog
->type
= PIPE_SHADER_COMPUTE
;
774 prog
->translated
= TRUE
;
776 prog
->code
= (uint32_t *)nve4_read_mp_pm_counters_code
;
777 prog
->code_size
= sizeof(nve4_read_mp_pm_counters_code
);
778 prog
->parm_size
= 12;
779 screen
->pm
.prog
= prog
;
782 /* disable all counting */
784 for (c
= 0; c
< 8; ++c
)
785 if (screen
->pm
.mp_counter
[c
])
786 IMMED_NVC0(push
, NVE4_COMPUTE(MP_PM_FUNC(c
)), 0);
787 /* release counters for this query */
788 for (c
= 0; c
< 8; ++c
) {
789 if (nvc0_query(screen
->pm
.mp_counter
[c
]) == q
) {
790 screen
->pm
.num_mp_pm_active
[c
/ 4]--;
791 screen
->pm
.mp_counter
[c
] = NULL
;
795 BCTX_REFN_bo(nvc0
->bufctx_cp
, CP_QUERY
, NOUVEAU_BO_GART
| NOUVEAU_BO_WR
,
798 pipe
->bind_compute_state(pipe
, screen
->pm
.prog
);
799 input
[0] = (q
->bo
->offset
+ q
->base
);
800 input
[1] = (q
->bo
->offset
+ q
->base
) >> 32;
801 input
[2] = q
->sequence
;
802 pipe
->launch_grid(pipe
, block
, grid
, 0, input
);
804 nouveau_bufctx_reset(nvc0
->bufctx_cp
, NVC0_BIND_CP_QUERY
);
806 /* re-activate other counters */
807 PUSH_SPACE(push
, 16);
809 for (c
= 0; c
< 8; ++c
) {
811 q
= nvc0_query(screen
->pm
.mp_counter
[c
]);
814 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
815 for (i
= 0; i
< cfg
->num_counters
; ++i
) {
816 if (mask
& (1 << q
->ctr
[i
]))
818 mask
|= 1 << q
->ctr
[i
];
819 BEGIN_NVC0(push
, NVE4_COMPUTE(MP_PM_FUNC(q
->ctr
[i
])), 1);
820 PUSH_DATA (push
, (cfg
->ctr
[i
].func
<< 4) | cfg
->ctr
[i
].mode
);
826 nve4_mp_pm_query_result(struct nvc0_context
*nvc0
, struct nvc0_query
*q
,
827 void *result
, boolean wait
)
832 const struct nve4_mp_pm_query_cfg
*cfg
;
834 cfg
= &nve4_mp_pm_queries
[q
->type
- PIPE_QUERY_DRIVER_SPECIFIC
];
836 for (p
= 0; p
< nvc0
->screen
->mp_count_compute
; ++p
) {
838 const unsigned b
= p
* 12;
840 clock
= *(uint64_t *)&q
->data
[b
+ 8];
841 (void)clock
; /* might be interesting one day */
843 if (q
->data
[b
+ 10] != q
->sequence
) {
844 /* WARNING: This will spin forever if you loop with wait == FALSE and
845 * the push buffer hasn't been flushed !
849 if (nouveau_bo_wait(q
->bo
, NOUVEAU_BO_RD
, nvc0
->base
.client
))
853 for (c
= 0; c
< cfg
->num_counters
; ++c
)
854 count
[c
] = q
->data
[b
+ q
->ctr
[c
]];
859 case PIPE_LOGICOP_AND
:
860 value
&= count
[0] & count
[1] & count
[2] & count
[3];
862 case PIPE_LOGICOP_OR
:
863 value
|= count
[0] | count
[1] | count
[2] | count
[3];
865 case PIPE_LOGICOP_CLEAR
: /* abused as ADD */
867 value
+= count
[0] + count
[1] + count
[2] + count
[3];
871 *(uint64_t *)result
= value
;
876 nvc0_screen_get_driver_query_info(struct pipe_screen
*pscreen
,
878 struct pipe_driver_query_info
*info
)
880 struct nvc0_screen
*screen
= nvc0_screen(pscreen
);
882 if (screen
->base
.class_3d
>= NVE4_3D_CLASS
) {
884 if (screen
->base
.device
->drm_version
>= 0x01000101)
885 count
= NVE4_PM_QUERY_COUNT
;
889 info
->name
= nve4_pm_query_names
[id
];
890 info
->query_type
= NVE4_PM_QUERY(id
);
891 info
->max_value
= ~0ULL;
892 info
->uses_byte_units
= FALSE
;
899 /* user asked for info about non-existing query */
900 info
->name
= "this_is_not_the_query_you_are_looking_for";
901 info
->query_type
= 0xdeadd01d;
903 info
->uses_byte_units
= FALSE
;
908 nvc0_init_query_functions(struct nvc0_context
*nvc0
)
910 struct pipe_context
*pipe
= &nvc0
->base
.pipe
;
912 pipe
->create_query
= nvc0_query_create
;
913 pipe
->destroy_query
= nvc0_query_destroy
;
914 pipe
->begin_query
= nvc0_query_begin
;
915 pipe
->end_query
= nvc0_query_end
;
916 pipe
->get_query_result
= nvc0_query_result
;
917 pipe
->render_condition
= nvc0_render_condition
;