2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Expose V3D HW perf counters.
27 * We also have code to fake support for occlusion queries.
28 * Since we expose support for GL 2.0, we have to expose occlusion queries,
29 * but the spec allows you to expose 0 query counter bits, so we just return 0
30 * as the result of all our queries.
32 #include "vc4_context.h"
37 struct vc4_hwperfmon
*hwperfmon
;
40 static const char *v3d_counter_names
[] = {
41 "FEP-valid-primitives-no-rendered-pixels",
42 "FEP-valid-primitives-rendered-pixels",
45 "TLB-quads-not-passing-stencil-test",
46 "TLB-quads-not-passing-z-and-stencil-test",
47 "TLB-quads-passing-z-and-stencil-test",
48 "TLB-quads-with-zero-coverage",
49 "TLB-quads-with-non-zero-coverage",
50 "TLB-quads-written-to-color-buffer",
51 "PTB-primitives-discarded-outside-viewport",
52 "PTB-primitives-need-clipping",
53 "PTB-primitives-discared-reversed",
54 "QPU-total-idle-clk-cycles",
55 "QPU-total-clk-cycles-vertex-coord-shading",
56 "QPU-total-clk-cycles-fragment-shading",
57 "QPU-total-clk-cycles-executing-valid-instr",
58 "QPU-total-clk-cycles-waiting-TMU",
59 "QPU-total-clk-cycles-waiting-scoreboard",
60 "QPU-total-clk-cycles-waiting-varyings",
61 "QPU-total-instr-cache-hit",
62 "QPU-total-instr-cache-miss",
63 "QPU-total-uniform-cache-hit",
64 "QPU-total-uniform-cache-miss",
65 "TMU-total-text-quads-processed",
66 "TMU-total-text-cache-miss",
67 "VPM-total-clk-cycles-VDW-stalled",
68 "VPM-total-clk-cycles-VCD-stalled",
69 "L2C-total-cache-hit",
70 "L2C-total-cache-miss",
73 int vc4_get_driver_query_group_info(struct pipe_screen
*pscreen
,
75 struct pipe_driver_query_group_info
*info
)
77 struct vc4_screen
*screen
= vc4_screen(pscreen
);
79 if (!screen
->has_perfmon_ioctl
)
88 info
->name
= "V3D counters";
89 info
->max_active_queries
= DRM_VC4_MAX_PERF_COUNTERS
;
90 info
->num_queries
= ARRAY_SIZE(v3d_counter_names
);
94 int vc4_get_driver_query_info(struct pipe_screen
*pscreen
, unsigned index
,
95 struct pipe_driver_query_info
*info
)
97 struct vc4_screen
*screen
= vc4_screen(pscreen
);
99 if (!screen
->has_perfmon_ioctl
)
103 return ARRAY_SIZE(v3d_counter_names
);
105 if (index
>= ARRAY_SIZE(v3d_counter_names
))
109 info
->name
= v3d_counter_names
[index
];
110 info
->query_type
= PIPE_QUERY_DRIVER_SPECIFIC
+ index
;
111 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE
;
112 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT64
;
113 info
->flags
= PIPE_DRIVER_QUERY_FLAG_BATCH
;
117 static struct pipe_query
*
118 vc4_create_batch_query(struct pipe_context
*pctx
, unsigned num_queries
,
119 unsigned *query_types
)
121 struct vc4_query
*query
= calloc(1, sizeof(*query
));
122 struct vc4_hwperfmon
*hwperfmon
;
123 unsigned i
, nhwqueries
= 0;
128 for (i
= 0; i
< num_queries
; i
++) {
129 if (query_types
[i
] >= PIPE_QUERY_DRIVER_SPECIFIC
)
133 /* We can't mix HW and non-HW queries. */
134 if (nhwqueries
&& nhwqueries
!= num_queries
)
138 return (struct pipe_query
*)query
;
140 hwperfmon
= calloc(1, sizeof(*hwperfmon
));
144 for (i
= 0; i
< num_queries
; i
++)
145 hwperfmon
->events
[i
] = query_types
[i
] -
146 PIPE_QUERY_DRIVER_SPECIFIC
;
148 query
->hwperfmon
= hwperfmon
;
149 query
->num_queries
= num_queries
;
151 /* Note that struct pipe_query isn't actually defined anywhere. */
152 return (struct pipe_query
*)query
;
160 static struct pipe_query
*
161 vc4_create_query(struct pipe_context
*ctx
, unsigned query_type
, unsigned index
)
163 return vc4_create_batch_query(ctx
, 1, &query_type
);
167 vc4_destroy_query(struct pipe_context
*pctx
, struct pipe_query
*pquery
)
169 struct vc4_context
*ctx
= vc4_context(pctx
);
170 struct vc4_query
*query
= (struct vc4_query
*)pquery
;
172 if (query
->hwperfmon
&& query
->hwperfmon
->id
) {
173 if (query
->hwperfmon
->id
) {
174 struct drm_vc4_perfmon_destroy req
= { };
176 req
.id
= query
->hwperfmon
->id
;
177 vc4_ioctl(ctx
->fd
, DRM_IOCTL_VC4_PERFMON_DESTROY
,
181 free(query
->hwperfmon
);
188 vc4_begin_query(struct pipe_context
*pctx
, struct pipe_query
*pquery
)
190 struct vc4_query
*query
= (struct vc4_query
*)pquery
;
191 struct vc4_context
*ctx
= vc4_context(pctx
);
192 struct drm_vc4_perfmon_create req
= { };
196 if (!query
->hwperfmon
)
199 /* Only one perfmon can be activated per context. */
203 /* Reset the counters by destroying the previously allocated perfmon */
204 if (query
->hwperfmon
->id
) {
205 struct drm_vc4_perfmon_destroy destroyreq
= { };
207 destroyreq
.id
= query
->hwperfmon
->id
;
208 vc4_ioctl(ctx
->fd
, DRM_IOCTL_VC4_PERFMON_DESTROY
, &destroyreq
);
211 for (i
= 0; i
< query
->num_queries
; i
++)
212 req
.events
[i
] = query
->hwperfmon
->events
[i
];
214 req
.ncounters
= query
->num_queries
;
215 ret
= vc4_ioctl(ctx
->fd
, DRM_IOCTL_VC4_PERFMON_CREATE
, &req
);
219 query
->hwperfmon
->id
= req
.id
;
221 /* Make sure all pendings jobs are flushed before activating the
225 ctx
->perfmon
= query
->hwperfmon
;
230 vc4_end_query(struct pipe_context
*pctx
, struct pipe_query
*pquery
)
232 struct vc4_query
*query
= (struct vc4_query
*)pquery
;
233 struct vc4_context
*ctx
= vc4_context(pctx
);
235 if (!query
->hwperfmon
)
238 if (ctx
->perfmon
!= query
->hwperfmon
)
241 /* Make sure all pendings jobs are flushed before deactivating the
250 vc4_get_query_result(struct pipe_context
*pctx
, struct pipe_query
*pquery
,
251 boolean wait
, union pipe_query_result
*vresult
)
253 struct vc4_context
*ctx
= vc4_context(pctx
);
254 struct vc4_query
*query
= (struct vc4_query
*)pquery
;
255 struct drm_vc4_perfmon_get_values req
;
259 if (!query
->hwperfmon
) {
264 if (!vc4_wait_seqno(ctx
->screen
, query
->hwperfmon
->last_seqno
,
265 wait
? PIPE_TIMEOUT_INFINITE
: 0, "perfmon"))
268 req
.id
= query
->hwperfmon
->id
;
269 req
.values_ptr
= (uintptr_t)query
->hwperfmon
->counters
;
270 ret
= vc4_ioctl(ctx
->fd
, DRM_IOCTL_VC4_PERFMON_GET_VALUES
, &req
);
274 for (i
= 0; i
< query
->num_queries
; i
++)
275 vresult
->batch
[i
].u64
= query
->hwperfmon
->counters
[i
];
281 vc4_set_active_query_state(struct pipe_context
*pctx
, boolean enable
)
286 vc4_query_init(struct pipe_context
*pctx
)
288 pctx
->create_query
= vc4_create_query
;
289 pctx
->create_batch_query
= vc4_create_batch_query
;
290 pctx
->destroy_query
= vc4_destroy_query
;
291 pctx
->begin_query
= vc4_begin_query
;
292 pctx
->end_query
= vc4_end_query
;
293 pctx
->get_query_result
= vc4_get_query_result
;
294 pctx
->set_active_query_state
= vc4_set_active_query_state
;