2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29 #include "freedreno_query_acc.h"
30 #include "freedreno_resource.h"
32 #include "fd5_context.h"
33 #include "fd5_format.h"
34 #include "fd5_query.h"
36 struct PACKED fd5_query_sample
{
42 /* offset of a single field of an array of fd5_query_sample: */
43 #define query_sample_idx(aq, idx, field) \
44 fd_resource((aq)->prsc)->bo, \
45 (idx * sizeof(struct fd5_query_sample)) + \
46 offsetof(struct fd5_query_sample, field), \
49 /* offset of a single field of fd5_query_sample: */
50 #define query_sample(aq, field) \
51 query_sample_idx(aq, 0, field)
56 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
61 occlusion_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
63 struct fd_ringbuffer
*ring
= batch
->draw
;
65 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_CONTROL
, 1);
66 OUT_RING(ring
, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY
);
68 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO
, 2);
69 OUT_RELOC(ring
, query_sample(aq
, start
));
71 OUT_PKT7(ring
, CP_EVENT_WRITE
, 1);
72 OUT_RING(ring
, ZPASS_DONE
);
75 fd5_context(batch
->ctx
)->samples_passed_queries
++;
79 occlusion_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
81 struct fd_ringbuffer
*ring
= batch
->draw
;
83 OUT_PKT7(ring
, CP_MEM_WRITE
, 4);
84 OUT_RELOC(ring
, query_sample(aq
, stop
));
85 OUT_RING(ring
, 0xffffffff);
86 OUT_RING(ring
, 0xffffffff);
88 OUT_PKT7(ring
, CP_WAIT_MEM_WRITES
, 0);
90 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_CONTROL
, 1);
91 OUT_RING(ring
, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY
);
93 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO
, 2);
94 OUT_RELOC(ring
, query_sample(aq
, stop
));
96 OUT_PKT7(ring
, CP_EVENT_WRITE
, 1);
97 OUT_RING(ring
, ZPASS_DONE
);
100 OUT_PKT7(ring
, CP_WAIT_REG_MEM
, 6);
101 OUT_RING(ring
, 0x00000014); // XXX
102 OUT_RELOC(ring
, query_sample(aq
, stop
));
103 OUT_RING(ring
, 0xffffffff);
104 OUT_RING(ring
, 0xffffffff);
105 OUT_RING(ring
, 0x00000010); // XXX
107 /* result += stop - start: */
108 OUT_PKT7(ring
, CP_MEM_TO_MEM
, 9);
109 OUT_RING(ring
, CP_MEM_TO_MEM_0_DOUBLE
|
110 CP_MEM_TO_MEM_0_NEG_C
);
111 OUT_RELOC(ring
, query_sample(aq
, result
)); /* dst */
112 OUT_RELOC(ring
, query_sample(aq
, result
)); /* srcA */
113 OUT_RELOC(ring
, query_sample(aq
, stop
)); /* srcB */
114 OUT_RELOC(ring
, query_sample(aq
, start
)); /* srcC */
116 fd5_context(batch
->ctx
)->samples_passed_queries
--;
120 occlusion_counter_result(struct fd_acc_query
*aq
, void *buf
,
121 union pipe_query_result
*result
)
123 struct fd5_query_sample
*sp
= buf
;
124 result
->u64
= sp
->result
;
128 occlusion_predicate_result(struct fd_acc_query
*aq
, void *buf
,
129 union pipe_query_result
*result
)
131 struct fd5_query_sample
*sp
= buf
;
132 result
->b
= !!sp
->result
;
135 static const struct fd_acc_sample_provider occlusion_counter
= {
136 .query_type
= PIPE_QUERY_OCCLUSION_COUNTER
,
137 .size
= sizeof(struct fd5_query_sample
),
138 .resume
= occlusion_resume
,
139 .pause
= occlusion_pause
,
140 .result
= occlusion_counter_result
,
143 static const struct fd_acc_sample_provider occlusion_predicate
= {
144 .query_type
= PIPE_QUERY_OCCLUSION_PREDICATE
,
145 .size
= sizeof(struct fd5_query_sample
),
146 .resume
= occlusion_resume
,
147 .pause
= occlusion_pause
,
148 .result
= occlusion_predicate_result
,
151 static const struct fd_acc_sample_provider occlusion_predicate_conservative
= {
152 .query_type
= PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
,
153 .size
= sizeof(struct fd5_query_sample
),
154 .resume
= occlusion_resume
,
155 .pause
= occlusion_pause
,
156 .result
= occlusion_predicate_result
,
164 timestamp_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
166 struct fd_ringbuffer
*ring
= batch
->draw
;
168 OUT_PKT7(ring
, CP_EVENT_WRITE
, 4);
169 OUT_RING(ring
, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS
) |
170 CP_EVENT_WRITE_0_TIMESTAMP
);
171 OUT_RELOC(ring
, query_sample(aq
, start
));
172 OUT_RING(ring
, 0x00000000);
178 timestamp_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
180 struct fd_ringbuffer
*ring
= batch
->draw
;
182 OUT_PKT7(ring
, CP_EVENT_WRITE
, 4);
183 OUT_RING(ring
, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS
) |
184 CP_EVENT_WRITE_0_TIMESTAMP
);
185 OUT_RELOC(ring
, query_sample(aq
, stop
));
186 OUT_RING(ring
, 0x00000000);
191 /* result += stop - start: */
192 OUT_PKT7(ring
, CP_MEM_TO_MEM
, 9);
193 OUT_RING(ring
, CP_MEM_TO_MEM_0_DOUBLE
|
194 CP_MEM_TO_MEM_0_NEG_C
);
195 OUT_RELOC(ring
, query_sample(aq
, result
)); /* dst */
196 OUT_RELOC(ring
, query_sample(aq
, result
)); /* srcA */
197 OUT_RELOC(ring
, query_sample(aq
, stop
)); /* srcB */
198 OUT_RELOC(ring
, query_sample(aq
, start
)); /* srcC */
202 ticks_to_ns(uint32_t ts
)
204 /* This is based on the 19.2MHz always-on rbbm timer.
206 * TODO we should probably query this value from kernel..
208 return ts
* (1000000000 / 19200000);
212 time_elapsed_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
213 union pipe_query_result
*result
)
215 struct fd5_query_sample
*sp
= buf
;
216 result
->u64
= ticks_to_ns(sp
->result
);
220 timestamp_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
221 union pipe_query_result
*result
)
223 struct fd5_query_sample
*sp
= buf
;
224 result
->u64
= ticks_to_ns(sp
->result
);
227 static const struct fd_acc_sample_provider time_elapsed
= {
228 .query_type
= PIPE_QUERY_TIME_ELAPSED
,
230 .size
= sizeof(struct fd5_query_sample
),
231 .resume
= timestamp_resume
,
232 .pause
= timestamp_pause
,
233 .result
= time_elapsed_accumulate_result
,
236 /* NOTE: timestamp query isn't going to give terribly sensible results
237 * on a tiler. But it is needed by qapitrace profile heatmap. If you
238 * add in a binning pass, the results get even more non-sensical. So
239 * we just return the timestamp on the first tile and hope that is
240 * kind of good enough.
243 static const struct fd_acc_sample_provider timestamp
= {
244 .query_type
= PIPE_QUERY_TIMESTAMP
,
246 .size
= sizeof(struct fd5_query_sample
),
247 .resume
= timestamp_resume
,
248 .pause
= timestamp_pause
,
249 .result
= timestamp_accumulate_result
,
253 * Performance Counter (batch) queries:
255 * Only one of these is active at a time, per design of the gallium
256 * batch_query API design. On perfcntr query tracks N query_types,
257 * each of which has a 'fd_batch_query_entry' that maps it back to
258 * the associated group and counter.
261 struct fd_batch_query_entry
{
262 uint8_t gid
; /* group-id */
263 uint8_t cid
; /* countable-id within the group */
266 struct fd_batch_query_data
{
267 struct fd_screen
*screen
;
268 unsigned num_query_entries
;
269 struct fd_batch_query_entry query_entries
[];
273 perfcntr_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
275 struct fd_batch_query_data
*data
= aq
->query_data
;
276 struct fd_screen
*screen
= data
->screen
;
277 struct fd_ringbuffer
*ring
= batch
->draw
;
279 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
280 memset(counters_per_group
, 0, sizeof(counters_per_group
));
284 /* configure performance counters for the requested queries: */
285 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
286 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
287 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
288 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
290 debug_assert(counter_idx
< g
->num_counters
);
292 OUT_PKT4(ring
, g
->counters
[counter_idx
].select_reg
, 1);
293 OUT_RING(ring
, g
->countables
[entry
->cid
].selector
);
296 memset(counters_per_group
, 0, sizeof(counters_per_group
));
298 /* and snapshot the start values */
299 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
300 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
301 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
302 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
303 const struct fd_perfcntr_counter
*counter
= &g
->counters
[counter_idx
];
305 OUT_PKT7(ring
, CP_REG_TO_MEM
, 3);
306 OUT_RING(ring
, CP_REG_TO_MEM_0_64B
|
307 CP_REG_TO_MEM_0_REG(counter
->counter_reg_lo
));
308 OUT_RELOC(ring
, query_sample_idx(aq
, i
, start
));
313 perfcntr_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
315 struct fd_batch_query_data
*data
= aq
->query_data
;
316 struct fd_screen
*screen
= data
->screen
;
317 struct fd_ringbuffer
*ring
= batch
->draw
;
319 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
320 memset(counters_per_group
, 0, sizeof(counters_per_group
));
324 /* TODO do we need to bother to turn anything off? */
326 /* snapshot the end values: */
327 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
328 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
329 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
330 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
331 const struct fd_perfcntr_counter
*counter
= &g
->counters
[counter_idx
];
333 OUT_PKT7(ring
, CP_REG_TO_MEM
, 3);
334 OUT_RING(ring
, CP_REG_TO_MEM_0_64B
|
335 CP_REG_TO_MEM_0_REG(counter
->counter_reg_lo
));
336 OUT_RELOC(ring
, query_sample_idx(aq
, i
, stop
));
339 /* and compute the result: */
340 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
341 /* result += stop - start: */
342 OUT_PKT7(ring
, CP_MEM_TO_MEM
, 9);
343 OUT_RING(ring
, CP_MEM_TO_MEM_0_DOUBLE
|
344 CP_MEM_TO_MEM_0_NEG_C
);
345 OUT_RELOC(ring
, query_sample_idx(aq
, i
, result
)); /* dst */
346 OUT_RELOC(ring
, query_sample_idx(aq
, i
, result
)); /* srcA */
347 OUT_RELOC(ring
, query_sample_idx(aq
, i
, stop
)); /* srcB */
348 OUT_RELOC(ring
, query_sample_idx(aq
, i
, start
)); /* srcC */
353 perfcntr_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
354 union pipe_query_result
*result
)
356 struct fd_batch_query_data
*data
= aq
->query_data
;
357 struct fd5_query_sample
*sp
= buf
;
359 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
360 result
->batch
[i
].u64
= sp
[i
].result
;
364 static const struct fd_acc_sample_provider perfcntr
= {
365 .query_type
= FD_QUERY_FIRST_PERFCNTR
,
367 .resume
= perfcntr_resume
,
368 .pause
= perfcntr_pause
,
369 .result
= perfcntr_accumulate_result
,
372 static struct pipe_query
*
373 fd5_create_batch_query(struct pipe_context
*pctx
,
374 unsigned num_queries
, unsigned *query_types
)
376 struct fd_context
*ctx
= fd_context(pctx
);
377 struct fd_screen
*screen
= ctx
->screen
;
379 struct fd_acc_query
*aq
;
380 struct fd_batch_query_data
*data
;
382 data
= CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data
,
383 num_queries
* sizeof(data
->query_entries
[0]));
385 data
->screen
= screen
;
386 data
->num_query_entries
= num_queries
;
388 /* validate the requested query_types and ensure we don't try
389 * to request more query_types of a given group than we have
392 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
393 memset(counters_per_group
, 0, sizeof(counters_per_group
));
395 for (unsigned i
= 0; i
< num_queries
; i
++) {
396 unsigned idx
= query_types
[i
] - FD_QUERY_FIRST_PERFCNTR
;
398 /* verify valid query_type, ie. is it actually a perfcntr? */
399 if ((query_types
[i
] < FD_QUERY_FIRST_PERFCNTR
) ||
400 (idx
>= screen
->num_perfcntr_queries
)) {
401 debug_printf("invalid batch query query_type: %u\n", query_types
[i
]);
405 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
406 struct pipe_driver_query_info
*pq
= &screen
->perfcntr_queries
[idx
];
408 entry
->gid
= pq
->group_id
;
410 /* the perfcntr_queries[] table flattens all the countables
411 * for each group in series, ie:
413 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
415 * So to find the countable index just step back through the
416 * table to find the first entry with the same group-id.
418 while (pq
> screen
->perfcntr_queries
) {
420 if (pq
->group_id
== entry
->gid
)
424 if (counters_per_group
[entry
->gid
] >=
425 screen
->perfcntr_groups
[entry
->gid
].num_counters
) {
426 debug_printf("too many counters for group %u\n", entry
->gid
);
430 counters_per_group
[entry
->gid
]++;
433 q
= fd_acc_create_query2(ctx
, 0, 0, &perfcntr
);
434 aq
= fd_acc_query(q
);
436 /* sample buffer size is based on # of queries: */
437 aq
->size
= num_queries
* sizeof(struct fd5_query_sample
);
438 aq
->query_data
= data
;
440 return (struct pipe_query
*)q
;
448 fd5_query_context_init(struct pipe_context
*pctx
)
450 struct fd_context
*ctx
= fd_context(pctx
);
452 ctx
->create_query
= fd_acc_create_query
;
453 ctx
->query_set_stage
= fd_acc_query_set_stage
;
455 pctx
->create_batch_query
= fd5_create_batch_query
;
457 fd_acc_query_register_provider(pctx
, &occlusion_counter
);
458 fd_acc_query_register_provider(pctx
, &occlusion_predicate
);
459 fd_acc_query_register_provider(pctx
, &occlusion_predicate_conservative
);
461 fd_acc_query_register_provider(pctx
, &time_elapsed
);
462 fd_acc_query_register_provider(pctx
, ×tamp
);