freedreno/query: add optional enable hook
[mesa.git] / src / gallium / drivers / freedreno / freedreno_query_hw.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32
33 #include "freedreno_query_hw.h"
34 #include "freedreno_context.h"
35 #include "freedreno_util.h"
36
37 struct fd_hw_sample_period {
38 struct fd_hw_sample *start, *end;
39 struct list_head list;
40 };
41
42 /* maps query_type to sample provider idx: */
43 static int pidx(unsigned query_type)
44 {
45 switch (query_type) {
46 case PIPE_QUERY_OCCLUSION_COUNTER:
47 return 0;
48 case PIPE_QUERY_OCCLUSION_PREDICATE:
49 return 1;
50 default:
51 return -1;
52 }
53 }
54
55 static struct fd_hw_sample *
56 get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
57 unsigned query_type)
58 {
59 struct fd_hw_sample *samp = NULL;
60 int idx = pidx(query_type);
61
62 if (!ctx->sample_cache[idx]) {
63 ctx->sample_cache[idx] =
64 ctx->sample_providers[idx]->get_sample(ctx, ring);
65 }
66
67 fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
68
69 return samp;
70 }
71
72 static void
73 clear_sample_cache(struct fd_context *ctx)
74 {
75 int i;
76
77 for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
78 fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
79 }
80
81 static bool
82 is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
83 {
84 return !!(hq->provider->active & stage);
85 }
86
87
88 static void
89 resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
90 struct fd_ringbuffer *ring)
91 {
92 int idx = pidx(hq->provider->query_type);
93 assert(!hq->period);
94 ctx->active_providers |= (1 << idx);
95 hq->period = util_slab_alloc(&ctx->sample_period_pool);
96 list_inithead(&hq->period->list);
97 hq->period->start = get_sample(ctx, ring, hq->base.type);
98 /* NOTE: util_slab_alloc() does not zero out the buffer: */
99 hq->period->end = NULL;
100 }
101
102 static void
103 pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
104 struct fd_ringbuffer *ring)
105 {
106 int idx = pidx(hq->provider->query_type);
107 assert(hq->period && !hq->period->end);
108 assert(ctx->active_providers & (1 << idx));
109 hq->period->end = get_sample(ctx, ring, hq->base.type);
110 list_addtail(&hq->period->list, &hq->current_periods);
111 hq->period = NULL;
112 }
113
114 static void
115 destroy_periods(struct fd_context *ctx, struct list_head *list)
116 {
117 struct fd_hw_sample_period *period, *s;
118 LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
119 fd_hw_sample_reference(ctx, &period->start, NULL);
120 fd_hw_sample_reference(ctx, &period->end, NULL);
121 list_del(&period->list);
122 util_slab_free(&ctx->sample_period_pool, period);
123 }
124 }
125
126 static void
127 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
128 {
129 struct fd_hw_query *hq = fd_hw_query(q);
130
131 destroy_periods(ctx, &hq->periods);
132 destroy_periods(ctx, &hq->current_periods);
133 list_del(&hq->list);
134
135 free(hq);
136 }
137
138 static boolean
139 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
140 {
141 struct fd_hw_query *hq = fd_hw_query(q);
142 if (q->active)
143 return false;
144
145 /* begin_query() should clear previous results: */
146 destroy_periods(ctx, &hq->periods);
147
148 if (is_active(hq, ctx->stage))
149 resume_query(ctx, hq, ctx->ring);
150
151 q->active = true;
152
153 /* add to active list: */
154 list_del(&hq->list);
155 list_addtail(&hq->list, &ctx->active_queries);
156 return true;
157 }
158
159 static void
160 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
161 {
162 struct fd_hw_query *hq = fd_hw_query(q);
163 if (!q->active)
164 return;
165 if (is_active(hq, ctx->stage))
166 pause_query(ctx, hq, ctx->ring);
167 q->active = false;
168 /* move to current list: */
169 list_del(&hq->list);
170 list_addtail(&hq->list, &ctx->current_queries);
171 }
172
173 /* helper to get ptr to specified sample: */
174 static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
175 {
176 return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
177 }
178
179 static boolean
180 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
181 boolean wait, union pipe_query_result *result)
182 {
183 struct fd_hw_query *hq = fd_hw_query(q);
184 const struct fd_hw_sample_provider *p = hq->provider;
185 struct fd_hw_sample_period *period;
186
187 if (q->active)
188 return false;
189
190 /* if the app tries to read back the query result before the
191 * batch is submitted, that forces us to flush so that there
192 * are actually results to wait for:
193 */
194 if (!LIST_IS_EMPTY(&hq->list)) {
195 /* if app didn't actually trigger any cmdstream, then
196 * we have nothing to do:
197 */
198 if (!ctx->needs_flush)
199 return true;
200 DBG("reading query result forces flush!");
201 fd_context_render(&ctx->base);
202 }
203
204 util_query_clear_result(result, q->type);
205
206 if (LIST_IS_EMPTY(&hq->periods))
207 return true;
208
209 assert(LIST_IS_EMPTY(&hq->list));
210 assert(LIST_IS_EMPTY(&hq->current_periods));
211 assert(!hq->period);
212
213 /* if !wait, then check the last sample (the one most likely to
214 * not be ready yet) and bail if it is not ready:
215 */
216 if (!wait) {
217 int ret;
218
219 period = LIST_ENTRY(struct fd_hw_sample_period,
220 hq->periods.prev, list);
221
222 ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe,
223 DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
224 if (ret)
225 return false;
226
227 fd_bo_cpu_fini(period->end->bo);
228 }
229
230 /* sum the result across all sample periods: */
231 LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
232 struct fd_hw_sample *start = period->start;
233 struct fd_hw_sample *end = period->end;
234 unsigned i;
235
236 /* start and end samples should be from same batch: */
237 assert(start->bo == end->bo);
238 assert(start->num_tiles == end->num_tiles);
239
240 for (i = 0; i < start->num_tiles; i++) {
241 void *ptr;
242
243 fd_bo_cpu_prep(start->bo, ctx->screen->pipe,
244 DRM_FREEDRENO_PREP_READ);
245
246 ptr = fd_bo_map(start->bo);
247
248 p->accumulate_result(ctx, sampptr(period->start, i, ptr),
249 sampptr(period->end, i, ptr), result);
250
251 fd_bo_cpu_fini(start->bo);
252 }
253 }
254
255 return true;
256 }
257
258 static const struct fd_query_funcs hw_query_funcs = {
259 .destroy_query = fd_hw_destroy_query,
260 .begin_query = fd_hw_begin_query,
261 .end_query = fd_hw_end_query,
262 .get_query_result = fd_hw_get_query_result,
263 };
264
265 struct fd_query *
266 fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
267 {
268 struct fd_hw_query *hq;
269 struct fd_query *q;
270 int idx = pidx(query_type);
271
272 if ((idx < 0) || !ctx->sample_providers[idx])
273 return NULL;
274
275 hq = CALLOC_STRUCT(fd_hw_query);
276 if (!hq)
277 return NULL;
278
279 hq->provider = ctx->sample_providers[idx];
280
281 list_inithead(&hq->periods);
282 list_inithead(&hq->current_periods);
283 list_inithead(&hq->list);
284
285 q = &hq->base;
286 q->funcs = &hw_query_funcs;
287 q->type = query_type;
288
289 return q;
290 }
291
292 struct fd_hw_sample *
293 fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
294 {
295 struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
296 pipe_reference_init(&samp->reference, 1);
297 samp->size = size;
298 samp->offset = ctx->next_sample_offset;
299 /* NOTE: util_slab_alloc() does not zero out the buffer: */
300 samp->bo = NULL;
301 samp->num_tiles = 0;
302 samp->tile_stride = 0;
303 ctx->next_sample_offset += size;
304 return samp;
305 }
306
307 void
308 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
309 {
310 if (samp->bo)
311 fd_bo_del(samp->bo);
312 util_slab_free(&ctx->sample_pool, samp);
313 }
314
315 static void
316 prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
317 uint32_t num_tiles, uint32_t tile_stride)
318 {
319 if (samp->bo) {
320 assert(samp->bo == bo);
321 assert(samp->num_tiles == num_tiles);
322 assert(samp->tile_stride == tile_stride);
323 return;
324 }
325 samp->bo = bo;
326 samp->num_tiles = num_tiles;
327 samp->tile_stride = tile_stride;
328 }
329
330 static void
331 prepare_query(struct fd_hw_query *hq, struct fd_bo *bo,
332 uint32_t num_tiles, uint32_t tile_stride)
333 {
334 struct fd_hw_sample_period *period, *s;
335
336 /* prepare all the samples in the query: */
337 LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
338 prepare_sample(period->start, bo, num_tiles, tile_stride);
339 prepare_sample(period->end, bo, num_tiles, tile_stride);
340
341 /* move from current_periods list to periods list: */
342 list_del(&period->list);
343 list_addtail(&period->list, &hq->periods);
344 }
345 }
346
347 static void
348 prepare_queries(struct fd_context *ctx, struct fd_bo *bo,
349 uint32_t num_tiles, uint32_t tile_stride,
350 struct list_head *list, bool remove)
351 {
352 struct fd_hw_query *hq, *s;
353 LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
354 prepare_query(hq, bo, num_tiles, tile_stride);
355 if (remove)
356 list_delinit(&hq->list);
357 }
358 }
359
360 /* called from gmem code once total storage requirements are known (ie.
361 * number of samples times number of tiles)
362 */
363 void
364 fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
365 {
366 uint32_t tile_stride = ctx->next_sample_offset;
367 struct fd_bo *bo;
368
369 if (ctx->query_bo)
370 fd_bo_del(ctx->query_bo);
371
372 if (tile_stride > 0) {
373 bo = fd_bo_new(ctx->dev, tile_stride * num_tiles,
374 DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
375 DRM_FREEDRENO_GEM_TYPE_KMEM);
376 } else {
377 bo = NULL;
378 }
379
380 ctx->query_bo = bo;
381 ctx->query_tile_stride = tile_stride;
382
383 prepare_queries(ctx, bo, num_tiles, tile_stride,
384 &ctx->active_queries, false);
385 prepare_queries(ctx, bo, num_tiles, tile_stride,
386 &ctx->current_queries, true);
387
388 /* reset things for next batch: */
389 ctx->next_sample_offset = 0;
390 }
391
392 void
393 fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
394 struct fd_ringbuffer *ring)
395 {
396 uint32_t tile_stride = ctx->query_tile_stride;
397 uint32_t offset = tile_stride * n;
398
399 /* bail if no queries: */
400 if (tile_stride == 0)
401 return;
402
403 fd_wfi(ctx, ring);
404 OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
405 OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0);
406 }
407
408 void
409 fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
410 enum fd_render_stage stage)
411 {
412 /* special case: internal blits (like mipmap level generation)
413 * go through normal draw path (via util_blitter_blit()).. but
414 * we need to ignore the FD_STAGE_DRAW which will be set, so we
415 * don't enable queries which should be paused during internal
416 * blits:
417 */
418 if ((ctx->stage == FD_STAGE_BLIT) &&
419 (stage != FD_STAGE_NULL))
420 return;
421
422 if (stage != ctx->stage) {
423 struct fd_hw_query *hq;
424 LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
425 bool was_active = is_active(hq, ctx->stage);
426 bool now_active = is_active(hq, stage);
427
428 if (now_active && !was_active)
429 resume_query(ctx, hq, ring);
430 else if (was_active && !now_active)
431 pause_query(ctx, hq, ring);
432 }
433 }
434 clear_sample_cache(ctx);
435 ctx->stage = stage;
436 }
437
438 /* call the provider->enable() for all the hw queries that were active
439 * in the current batch. This sets up perfctr selector regs statically
440 * for the duration of the batch.
441 */
442 void
443 fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
444 {
445 for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
446 if (ctx->active_providers & (1 << idx)) {
447 assert(ctx->sample_providers[idx]);
448 if (ctx->sample_providers[idx]->enable)
449 ctx->sample_providers[idx]->enable(ctx, ring);
450 }
451 }
452 ctx->active_providers = 0; /* clear it for next frame */
453 }
454
455 void
456 fd_hw_query_register_provider(struct pipe_context *pctx,
457 const struct fd_hw_sample_provider *provider)
458 {
459 struct fd_context *ctx = fd_context(pctx);
460 int idx = pidx(provider->query_type);
461
462 assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
463 assert(!ctx->sample_providers[idx]);
464
465 ctx->sample_providers[idx] = provider;
466 }
467
468 void
469 fd_hw_query_init(struct pipe_context *pctx)
470 {
471 struct fd_context *ctx = fd_context(pctx);
472
473 util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
474 16, UTIL_SLAB_SINGLETHREADED);
475 util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
476 16, UTIL_SLAB_SINGLETHREADED);
477 list_inithead(&ctx->active_queries);
478 list_inithead(&ctx->current_queries);
479 }
480
481 void
482 fd_hw_query_fini(struct pipe_context *pctx)
483 {
484 struct fd_context *ctx = fd_context(pctx);
485
486 util_slab_destroy(&ctx->sample_pool);
487 util_slab_destroy(&ctx->sample_period_pool);
488 }