2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29 #include "freedreno_query_acc.h"
30 #include "freedreno_resource.h"
32 #include "fd5_context.h"
33 #include "fd5_format.h"
34 #include "fd5_query.h"
36 struct PACKED fd5_query_sample
{
42 /* offset of a single field of an array of fd5_query_sample: */
43 #define query_sample_idx(aq, idx, field) \
44 fd_resource((aq)->prsc)->bo, \
45 (idx * sizeof(struct fd5_query_sample)) + \
46 offsetof(struct fd5_query_sample, field), \
49 /* offset of a single field of fd5_query_sample: */
50 #define query_sample(aq, field) \
51 query_sample_idx(aq, 0, field)
56 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
61 occlusion_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
63 struct fd_ringbuffer
*ring
= batch
->draw
;
65 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_CONTROL
, 1);
66 OUT_RING(ring
, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY
);
68 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO
, 2);
69 OUT_RELOCW(ring
, query_sample(aq
, start
));
71 OUT_PKT7(ring
, CP_EVENT_WRITE
, 1);
72 OUT_RING(ring
, ZPASS_DONE
);
75 fd5_context(batch
->ctx
)->samples_passed_queries
++;
79 occlusion_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
81 struct fd_ringbuffer
*ring
= batch
->draw
;
83 OUT_PKT7(ring
, CP_MEM_WRITE
, 4);
84 OUT_RELOCW(ring
, query_sample(aq
, stop
));
85 OUT_RING(ring
, 0xffffffff);
86 OUT_RING(ring
, 0xffffffff);
88 OUT_PKT7(ring
, CP_WAIT_MEM_WRITES
, 0);
90 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_CONTROL
, 1);
91 OUT_RING(ring
, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY
);
93 OUT_PKT4(ring
, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO
, 2);
94 OUT_RELOCW(ring
, query_sample(aq
, stop
));
96 OUT_PKT7(ring
, CP_EVENT_WRITE
, 1);
97 OUT_RING(ring
, ZPASS_DONE
);
100 OUT_PKT7(ring
, CP_WAIT_REG_MEM
, 6);
101 OUT_RING(ring
, 0x00000014); // XXX
102 OUT_RELOC(ring
, query_sample(aq
, stop
));
103 OUT_RING(ring
, 0xffffffff);
104 OUT_RING(ring
, 0xffffffff);
105 OUT_RING(ring
, 0x00000010); // XXX
107 /* result += stop - start: */
108 OUT_PKT7(ring
, CP_MEM_TO_MEM
, 9);
109 OUT_RING(ring
, CP_MEM_TO_MEM_0_DOUBLE
|
110 CP_MEM_TO_MEM_0_NEG_C
);
111 OUT_RELOCW(ring
, query_sample(aq
, result
)); /* dst */
112 OUT_RELOC(ring
, query_sample(aq
, result
)); /* srcA */
113 OUT_RELOC(ring
, query_sample(aq
, stop
)); /* srcB */
114 OUT_RELOC(ring
, query_sample(aq
, start
)); /* srcC */
116 fd5_context(batch
->ctx
)->samples_passed_queries
--;
120 occlusion_counter_result(struct fd_acc_query
*aq
, void *buf
,
121 union pipe_query_result
*result
)
123 struct fd5_query_sample
*sp
= buf
;
124 result
->u64
= sp
->result
;
128 occlusion_predicate_result(struct fd_acc_query
*aq
, void *buf
,
129 union pipe_query_result
*result
)
131 struct fd5_query_sample
*sp
= buf
;
132 result
->b
= !!sp
->result
;
135 static const struct fd_acc_sample_provider occlusion_counter
= {
136 .query_type
= PIPE_QUERY_OCCLUSION_COUNTER
,
137 .active
= FD_STAGE_DRAW
,
138 .size
= sizeof(struct fd5_query_sample
),
139 .resume
= occlusion_resume
,
140 .pause
= occlusion_pause
,
141 .result
= occlusion_counter_result
,
144 static const struct fd_acc_sample_provider occlusion_predicate
= {
145 .query_type
= PIPE_QUERY_OCCLUSION_PREDICATE
,
146 .active
= FD_STAGE_DRAW
,
147 .size
= sizeof(struct fd5_query_sample
),
148 .resume
= occlusion_resume
,
149 .pause
= occlusion_pause
,
150 .result
= occlusion_predicate_result
,
153 static const struct fd_acc_sample_provider occlusion_predicate_conservative
= {
154 .query_type
= PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
,
155 .active
= FD_STAGE_DRAW
,
156 .size
= sizeof(struct fd5_query_sample
),
157 .resume
= occlusion_resume
,
158 .pause
= occlusion_pause
,
159 .result
= occlusion_predicate_result
,
167 timestamp_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
169 struct fd_ringbuffer
*ring
= batch
->draw
;
171 OUT_PKT7(ring
, CP_EVENT_WRITE
, 4);
172 OUT_RING(ring
, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT
) |
173 CP_EVENT_WRITE_0_TIMESTAMP
);
174 OUT_RELOCW(ring
, query_sample(aq
, start
));
175 OUT_RING(ring
, 0x00000000);
181 timestamp_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
183 struct fd_ringbuffer
*ring
= batch
->draw
;
185 OUT_PKT7(ring
, CP_EVENT_WRITE
, 4);
186 OUT_RING(ring
, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT
) |
187 CP_EVENT_WRITE_0_TIMESTAMP
);
188 OUT_RELOCW(ring
, query_sample(aq
, stop
));
189 OUT_RING(ring
, 0x00000000);
194 /* result += stop - start: */
195 OUT_PKT7(ring
, CP_MEM_TO_MEM
, 9);
196 OUT_RING(ring
, CP_MEM_TO_MEM_0_DOUBLE
|
197 CP_MEM_TO_MEM_0_NEG_C
);
198 OUT_RELOCW(ring
, query_sample(aq
, result
)); /* dst */
199 OUT_RELOC(ring
, query_sample(aq
, result
)); /* srcA */
200 OUT_RELOC(ring
, query_sample(aq
, stop
)); /* srcB */
201 OUT_RELOC(ring
, query_sample(aq
, start
)); /* srcC */
205 ticks_to_ns(uint32_t ts
)
207 /* This is based on the 19.2MHz always-on rbbm timer.
209 * TODO we should probably query this value from kernel..
211 return ts
* (1000000000 / 19200000);
215 time_elapsed_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
216 union pipe_query_result
*result
)
218 struct fd5_query_sample
*sp
= buf
;
219 result
->u64
= ticks_to_ns(sp
->result
);
223 timestamp_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
224 union pipe_query_result
*result
)
226 struct fd5_query_sample
*sp
= buf
;
227 result
->u64
= ticks_to_ns(sp
->result
);
230 static const struct fd_acc_sample_provider time_elapsed
= {
231 .query_type
= PIPE_QUERY_TIME_ELAPSED
,
232 .active
= FD_STAGE_DRAW
| FD_STAGE_CLEAR
,
233 .size
= sizeof(struct fd5_query_sample
),
234 .resume
= timestamp_resume
,
235 .pause
= timestamp_pause
,
236 .result
= time_elapsed_accumulate_result
,
239 /* NOTE: timestamp query isn't going to give terribly sensible results
240 * on a tiler. But it is needed by qapitrace profile heatmap. If you
241 * add in a binning pass, the results get even more non-sensical. So
242 * we just return the timestamp on the first tile and hope that is
243 * kind of good enough.
246 static const struct fd_acc_sample_provider timestamp
= {
247 .query_type
= PIPE_QUERY_TIMESTAMP
,
248 .active
= FD_STAGE_ALL
,
249 .size
= sizeof(struct fd5_query_sample
),
250 .resume
= timestamp_resume
,
251 .pause
= timestamp_pause
,
252 .result
= timestamp_accumulate_result
,
256 * Performance Counter (batch) queries:
258 * Only one of these is active at a time, per design of the gallium
259 * batch_query API design. On perfcntr query tracks N query_types,
260 * each of which has a 'fd_batch_query_entry' that maps it back to
261 * the associated group and counter.
264 struct fd_batch_query_entry
{
265 uint8_t gid
; /* group-id */
266 uint8_t cid
; /* countable-id within the group */
269 struct fd_batch_query_data
{
270 struct fd_screen
*screen
;
271 unsigned num_query_entries
;
272 struct fd_batch_query_entry query_entries
[];
276 perfcntr_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
278 struct fd_batch_query_data
*data
= aq
->query_data
;
279 struct fd_screen
*screen
= data
->screen
;
280 struct fd_ringbuffer
*ring
= batch
->draw
;
282 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
283 memset(counters_per_group
, 0, sizeof(counters_per_group
));
287 /* configure performance counters for the requested queries: */
288 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
289 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
290 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
291 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
293 debug_assert(counter_idx
< g
->num_counters
);
295 OUT_PKT4(ring
, g
->counters
[counter_idx
].select_reg
, 1);
296 OUT_RING(ring
, g
->countables
[entry
->cid
].selector
);
299 memset(counters_per_group
, 0, sizeof(counters_per_group
));
301 /* and snapshot the start values */
302 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
303 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
304 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
305 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
306 const struct fd_perfcntr_counter
*counter
= &g
->counters
[counter_idx
];
308 OUT_PKT7(ring
, CP_REG_TO_MEM
, 3);
309 OUT_RING(ring
, CP_REG_TO_MEM_0_64B
|
310 CP_REG_TO_MEM_0_REG(counter
->counter_reg_lo
));
311 OUT_RELOCW(ring
, query_sample_idx(aq
, i
, start
));
316 perfcntr_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
318 struct fd_batch_query_data
*data
= aq
->query_data
;
319 struct fd_screen
*screen
= data
->screen
;
320 struct fd_ringbuffer
*ring
= batch
->draw
;
322 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
323 memset(counters_per_group
, 0, sizeof(counters_per_group
));
327 /* TODO do we need to bother to turn anything off? */
329 /* snapshot the end values: */
330 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
331 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
332 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
333 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
334 const struct fd_perfcntr_counter
*counter
= &g
->counters
[counter_idx
];
336 OUT_PKT7(ring
, CP_REG_TO_MEM
, 3);
337 OUT_RING(ring
, CP_REG_TO_MEM_0_64B
|
338 CP_REG_TO_MEM_0_REG(counter
->counter_reg_lo
));
339 OUT_RELOCW(ring
, query_sample_idx(aq
, i
, stop
));
342 /* and compute the result: */
343 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
344 /* result += stop - start: */
345 OUT_PKT7(ring
, CP_MEM_TO_MEM
, 9);
346 OUT_RING(ring
, CP_MEM_TO_MEM_0_DOUBLE
|
347 CP_MEM_TO_MEM_0_NEG_C
);
348 OUT_RELOCW(ring
, query_sample_idx(aq
, i
, result
)); /* dst */
349 OUT_RELOC(ring
, query_sample_idx(aq
, i
, result
)); /* srcA */
350 OUT_RELOC(ring
, query_sample_idx(aq
, i
, stop
)); /* srcB */
351 OUT_RELOC(ring
, query_sample_idx(aq
, i
, start
)); /* srcC */
356 perfcntr_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
357 union pipe_query_result
*result
)
359 struct fd_batch_query_data
*data
= aq
->query_data
;
360 struct fd5_query_sample
*sp
= buf
;
362 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
363 result
->batch
[i
].u64
= sp
[i
].result
;
367 static const struct fd_acc_sample_provider perfcntr
= {
368 .query_type
= FD_QUERY_FIRST_PERFCNTR
,
369 .active
= FD_STAGE_DRAW
| FD_STAGE_CLEAR
,
370 .resume
= perfcntr_resume
,
371 .pause
= perfcntr_pause
,
372 .result
= perfcntr_accumulate_result
,
375 static struct pipe_query
*
376 fd5_create_batch_query(struct pipe_context
*pctx
,
377 unsigned num_queries
, unsigned *query_types
)
379 struct fd_context
*ctx
= fd_context(pctx
);
380 struct fd_screen
*screen
= ctx
->screen
;
382 struct fd_acc_query
*aq
;
383 struct fd_batch_query_data
*data
;
385 data
= CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data
,
386 num_queries
* sizeof(data
->query_entries
[0]));
388 data
->screen
= screen
;
389 data
->num_query_entries
= num_queries
;
391 /* validate the requested query_types and ensure we don't try
392 * to request more query_types of a given group than we have
395 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
396 memset(counters_per_group
, 0, sizeof(counters_per_group
));
398 for (unsigned i
= 0; i
< num_queries
; i
++) {
399 unsigned idx
= query_types
[i
] - FD_QUERY_FIRST_PERFCNTR
;
401 /* verify valid query_type, ie. is it actually a perfcntr? */
402 if ((query_types
[i
] < FD_QUERY_FIRST_PERFCNTR
) ||
403 (idx
>= screen
->num_perfcntr_queries
)) {
404 debug_printf("invalid batch query query_type: %u\n", query_types
[i
]);
408 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
409 struct pipe_driver_query_info
*pq
= &screen
->perfcntr_queries
[idx
];
411 entry
->gid
= pq
->group_id
;
413 /* the perfcntr_queries[] table flattens all the countables
414 * for each group in series, ie:
416 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
418 * So to find the countable index just step back through the
419 * table to find the first entry with the same group-id.
421 while (pq
> screen
->perfcntr_queries
) {
423 if (pq
->group_id
== entry
->gid
)
427 if (counters_per_group
[entry
->gid
] >=
428 screen
->perfcntr_groups
[entry
->gid
].num_counters
) {
429 debug_printf("too many counters for group %u\n", entry
->gid
);
433 counters_per_group
[entry
->gid
]++;
436 q
= fd_acc_create_query2(ctx
, 0, 0, &perfcntr
);
437 aq
= fd_acc_query(q
);
439 /* sample buffer size is based on # of queries: */
440 aq
->size
= num_queries
* sizeof(struct fd5_query_sample
);
441 aq
->query_data
= data
;
443 return (struct pipe_query
*)q
;
451 fd5_query_context_init(struct pipe_context
*pctx
)
453 struct fd_context
*ctx
= fd_context(pctx
);
455 ctx
->create_query
= fd_acc_create_query
;
456 ctx
->query_set_stage
= fd_acc_query_set_stage
;
458 pctx
->create_batch_query
= fd5_create_batch_query
;
460 fd_acc_query_register_provider(pctx
, &occlusion_counter
);
461 fd_acc_query_register_provider(pctx
, &occlusion_predicate
);
462 fd_acc_query_register_provider(pctx
, &occlusion_predicate_conservative
);
464 fd_acc_query_register_provider(pctx
, &time_elapsed
);
465 fd_acc_query_register_provider(pctx
, ×tamp
);