freedreno/a4xx: timestamp queries
[mesa.git] / src / gallium / drivers / freedreno / freedreno_query_hw.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32
33 #include "freedreno_query_hw.h"
34 #include "freedreno_context.h"
35 #include "freedreno_util.h"
36
37 struct fd_hw_sample_period {
38 struct fd_hw_sample *start, *end;
39 struct list_head list;
40 };
41
42 /* maps query_type to sample provider idx: */
43 static int pidx(unsigned query_type)
44 {
45 switch (query_type) {
46 case PIPE_QUERY_OCCLUSION_COUNTER:
47 return 0;
48 case PIPE_QUERY_OCCLUSION_PREDICATE:
49 return 1;
50 /* TODO currently queries only emitted in main pass (not in binning pass)..
51 * which is fine for occlusion query, but pretty much not anything else.
52 */
53 case PIPE_QUERY_TIME_ELAPSED:
54 return 2;
55 case PIPE_QUERY_TIMESTAMP:
56 return 3;
57 default:
58 return -1;
59 }
60 }
61
62 static struct fd_hw_sample *
63 get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
64 unsigned query_type)
65 {
66 struct fd_hw_sample *samp = NULL;
67 int idx = pidx(query_type);
68
69 assume(idx >= 0); /* query never would have been created otherwise */
70
71 if (!ctx->sample_cache[idx]) {
72 ctx->sample_cache[idx] =
73 ctx->sample_providers[idx]->get_sample(ctx, ring);
74 ctx->needs_flush = true;
75 }
76
77 fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
78
79 return samp;
80 }
81
82 static void
83 clear_sample_cache(struct fd_context *ctx)
84 {
85 int i;
86
87 for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
88 fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
89 }
90
91 static bool
92 is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
93 {
94 return !!(hq->provider->active & stage);
95 }
96
97
98 static void
99 resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
100 struct fd_ringbuffer *ring)
101 {
102 int idx = pidx(hq->provider->query_type);
103 assert(idx >= 0); /* query never would have been created otherwise */
104 assert(!hq->period);
105 ctx->active_providers |= (1 << idx);
106 hq->period = util_slab_alloc(&ctx->sample_period_pool);
107 list_inithead(&hq->period->list);
108 hq->period->start = get_sample(ctx, ring, hq->base.type);
109 /* NOTE: util_slab_alloc() does not zero out the buffer: */
110 hq->period->end = NULL;
111 }
112
113 static void
114 pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
115 struct fd_ringbuffer *ring)
116 {
117 int idx = pidx(hq->provider->query_type);
118 assert(idx >= 0); /* query never would have been created otherwise */
119 assert(hq->period && !hq->period->end);
120 assert(ctx->active_providers & (1 << idx));
121 hq->period->end = get_sample(ctx, ring, hq->base.type);
122 list_addtail(&hq->period->list, &hq->current_periods);
123 hq->period = NULL;
124 }
125
126 static void
127 destroy_periods(struct fd_context *ctx, struct list_head *list)
128 {
129 struct fd_hw_sample_period *period, *s;
130 LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
131 fd_hw_sample_reference(ctx, &period->start, NULL);
132 fd_hw_sample_reference(ctx, &period->end, NULL);
133 list_del(&period->list);
134 util_slab_free(&ctx->sample_period_pool, period);
135 }
136 }
137
138 static void
139 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
140 {
141 struct fd_hw_query *hq = fd_hw_query(q);
142
143 destroy_periods(ctx, &hq->periods);
144 destroy_periods(ctx, &hq->current_periods);
145 list_del(&hq->list);
146
147 free(hq);
148 }
149
150 static boolean
151 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
152 {
153 struct fd_hw_query *hq = fd_hw_query(q);
154 if (q->active)
155 return false;
156
157 /* begin_query() should clear previous results: */
158 destroy_periods(ctx, &hq->periods);
159
160 if (is_active(hq, ctx->stage))
161 resume_query(ctx, hq, ctx->ring);
162
163 q->active = true;
164
165 /* add to active list: */
166 list_del(&hq->list);
167 list_addtail(&hq->list, &ctx->active_queries);
168 return true;
169 }
170
171 static void
172 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
173 {
174 struct fd_hw_query *hq = fd_hw_query(q);
175 /* there are a couple special cases, which don't have
176 * a matching ->begin_query():
177 */
178 if (skip_begin_query(q->type) && !q->active) {
179 fd_hw_begin_query(ctx, q);
180 }
181 if (!q->active)
182 return;
183 if (is_active(hq, ctx->stage))
184 pause_query(ctx, hq, ctx->ring);
185 q->active = false;
186 /* move to current list: */
187 list_del(&hq->list);
188 list_addtail(&hq->list, &ctx->current_queries);
189 }
190
191 /* helper to get ptr to specified sample: */
192 static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
193 {
194 return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
195 }
196
197 static boolean
198 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
199 boolean wait, union pipe_query_result *result)
200 {
201 struct fd_hw_query *hq = fd_hw_query(q);
202 const struct fd_hw_sample_provider *p = hq->provider;
203 struct fd_hw_sample_period *period;
204
205 if (q->active)
206 return false;
207
208 /* if the app tries to read back the query result before the
209 * batch is submitted, that forces us to flush so that there
210 * are actually results to wait for:
211 */
212 if (!LIST_IS_EMPTY(&hq->list)) {
213 /* if app didn't actually trigger any cmdstream, then
214 * we have nothing to do:
215 */
216 if (!ctx->needs_flush)
217 return true;
218 DBG("reading query result forces flush!");
219 fd_context_render(&ctx->base);
220 }
221
222 util_query_clear_result(result, q->type);
223
224 if (LIST_IS_EMPTY(&hq->periods))
225 return true;
226
227 assert(LIST_IS_EMPTY(&hq->list));
228 assert(LIST_IS_EMPTY(&hq->current_periods));
229 assert(!hq->period);
230
231 /* if !wait, then check the last sample (the one most likely to
232 * not be ready yet) and bail if it is not ready:
233 */
234 if (!wait) {
235 int ret;
236
237 period = LIST_ENTRY(struct fd_hw_sample_period,
238 hq->periods.prev, list);
239
240 ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe,
241 DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
242 if (ret)
243 return false;
244
245 fd_bo_cpu_fini(period->end->bo);
246 }
247
248 /* sum the result across all sample periods: */
249 LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
250 struct fd_hw_sample *start = period->start;
251 struct fd_hw_sample *end = period->end;
252 unsigned i;
253
254 /* start and end samples should be from same batch: */
255 assert(start->bo == end->bo);
256 assert(start->num_tiles == end->num_tiles);
257
258 for (i = 0; i < start->num_tiles; i++) {
259 void *ptr;
260
261 fd_bo_cpu_prep(start->bo, ctx->screen->pipe,
262 DRM_FREEDRENO_PREP_READ);
263
264 ptr = fd_bo_map(start->bo);
265
266 p->accumulate_result(ctx, sampptr(period->start, i, ptr),
267 sampptr(period->end, i, ptr), result);
268
269 fd_bo_cpu_fini(start->bo);
270 }
271 }
272
273 return true;
274 }
275
276 static const struct fd_query_funcs hw_query_funcs = {
277 .destroy_query = fd_hw_destroy_query,
278 .begin_query = fd_hw_begin_query,
279 .end_query = fd_hw_end_query,
280 .get_query_result = fd_hw_get_query_result,
281 };
282
283 struct fd_query *
284 fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
285 {
286 struct fd_hw_query *hq;
287 struct fd_query *q;
288 int idx = pidx(query_type);
289
290 if ((idx < 0) || !ctx->sample_providers[idx])
291 return NULL;
292
293 hq = CALLOC_STRUCT(fd_hw_query);
294 if (!hq)
295 return NULL;
296
297 hq->provider = ctx->sample_providers[idx];
298
299 list_inithead(&hq->periods);
300 list_inithead(&hq->current_periods);
301 list_inithead(&hq->list);
302
303 q = &hq->base;
304 q->funcs = &hw_query_funcs;
305 q->type = query_type;
306
307 return q;
308 }
309
310 struct fd_hw_sample *
311 fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
312 {
313 struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
314 pipe_reference_init(&samp->reference, 1);
315 samp->size = size;
316 debug_assert(util_is_power_of_two(size));
317 ctx->next_sample_offset = align(ctx->next_sample_offset, size);
318 samp->offset = ctx->next_sample_offset;
319 /* NOTE: util_slab_alloc() does not zero out the buffer: */
320 samp->bo = NULL;
321 samp->num_tiles = 0;
322 samp->tile_stride = 0;
323 ctx->next_sample_offset += size;
324 return samp;
325 }
326
327 void
328 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
329 {
330 if (samp->bo)
331 fd_bo_del(samp->bo);
332 util_slab_free(&ctx->sample_pool, samp);
333 }
334
335 static void
336 prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
337 uint32_t num_tiles, uint32_t tile_stride)
338 {
339 if (samp->bo) {
340 assert(samp->bo == bo);
341 assert(samp->num_tiles == num_tiles);
342 assert(samp->tile_stride == tile_stride);
343 return;
344 }
345 samp->bo = fd_bo_ref(bo);
346 samp->num_tiles = num_tiles;
347 samp->tile_stride = tile_stride;
348 }
349
350 static void
351 prepare_query(struct fd_hw_query *hq, struct fd_bo *bo,
352 uint32_t num_tiles, uint32_t tile_stride)
353 {
354 struct fd_hw_sample_period *period, *s;
355
356 /* prepare all the samples in the query: */
357 LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
358 prepare_sample(period->start, bo, num_tiles, tile_stride);
359 prepare_sample(period->end, bo, num_tiles, tile_stride);
360
361 /* move from current_periods list to periods list: */
362 list_del(&period->list);
363 list_addtail(&period->list, &hq->periods);
364 }
365 }
366
367 static void
368 prepare_queries(struct fd_context *ctx, struct fd_bo *bo,
369 uint32_t num_tiles, uint32_t tile_stride,
370 struct list_head *list, bool remove)
371 {
372 struct fd_hw_query *hq, *s;
373 LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
374 prepare_query(hq, bo, num_tiles, tile_stride);
375 if (remove)
376 list_delinit(&hq->list);
377 }
378 }
379
380 /* called from gmem code once total storage requirements are known (ie.
381 * number of samples times number of tiles)
382 */
383 void
384 fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
385 {
386 uint32_t tile_stride = ctx->next_sample_offset;
387 struct fd_bo *bo;
388
389 if (ctx->query_bo)
390 fd_bo_del(ctx->query_bo);
391
392 if (tile_stride > 0) {
393 bo = fd_bo_new(ctx->dev, tile_stride * num_tiles,
394 DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
395 DRM_FREEDRENO_GEM_TYPE_KMEM);
396 } else {
397 bo = NULL;
398 }
399
400 ctx->query_bo = bo;
401 ctx->query_tile_stride = tile_stride;
402
403 prepare_queries(ctx, bo, num_tiles, tile_stride,
404 &ctx->active_queries, false);
405 prepare_queries(ctx, bo, num_tiles, tile_stride,
406 &ctx->current_queries, true);
407
408 /* reset things for next batch: */
409 ctx->next_sample_offset = 0;
410 }
411
412 void
413 fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
414 struct fd_ringbuffer *ring)
415 {
416 uint32_t tile_stride = ctx->query_tile_stride;
417 uint32_t offset = tile_stride * n;
418
419 /* bail if no queries: */
420 if (tile_stride == 0)
421 return;
422
423 fd_wfi(ctx, ring);
424 OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
425 OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0);
426 }
427
428 void
429 fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
430 enum fd_render_stage stage)
431 {
432 /* special case: internal blits (like mipmap level generation)
433 * go through normal draw path (via util_blitter_blit()).. but
434 * we need to ignore the FD_STAGE_DRAW which will be set, so we
435 * don't enable queries which should be paused during internal
436 * blits:
437 */
438 if ((ctx->stage == FD_STAGE_BLIT) &&
439 (stage != FD_STAGE_NULL))
440 return;
441
442 if (stage != ctx->stage) {
443 struct fd_hw_query *hq;
444 LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
445 bool was_active = is_active(hq, ctx->stage);
446 bool now_active = is_active(hq, stage);
447
448 if (now_active && !was_active)
449 resume_query(ctx, hq, ring);
450 else if (was_active && !now_active)
451 pause_query(ctx, hq, ring);
452 }
453 }
454 clear_sample_cache(ctx);
455 ctx->stage = stage;
456 }
457
458 /* call the provider->enable() for all the hw queries that were active
459 * in the current batch. This sets up perfctr selector regs statically
460 * for the duration of the batch.
461 */
462 void
463 fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
464 {
465 for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
466 if (ctx->active_providers & (1 << idx)) {
467 assert(ctx->sample_providers[idx]);
468 if (ctx->sample_providers[idx]->enable)
469 ctx->sample_providers[idx]->enable(ctx, ring);
470 }
471 }
472 ctx->active_providers = 0; /* clear it for next frame */
473 }
474
475 void
476 fd_hw_query_register_provider(struct pipe_context *pctx,
477 const struct fd_hw_sample_provider *provider)
478 {
479 struct fd_context *ctx = fd_context(pctx);
480 int idx = pidx(provider->query_type);
481
482 assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
483 assert(!ctx->sample_providers[idx]);
484
485 ctx->sample_providers[idx] = provider;
486 }
487
488 void
489 fd_hw_query_init(struct pipe_context *pctx)
490 {
491 struct fd_context *ctx = fd_context(pctx);
492
493 util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
494 16, UTIL_SLAB_SINGLETHREADED);
495 util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
496 16, UTIL_SLAB_SINGLETHREADED);
497 list_inithead(&ctx->active_queries);
498 list_inithead(&ctx->current_queries);
499 }
500
501 void
502 fd_hw_query_fini(struct pipe_context *pctx)
503 {
504 struct fd_context *ctx = fd_context(pctx);
505
506 util_slab_destroy(&ctx->sample_pool);
507 util_slab_destroy(&ctx->sample_period_pool);
508 }