freedreno: use prsc for hw queries
[mesa.git] / src / gallium / drivers / freedreno / freedreno_query_hw.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32
33 #include "freedreno_query_hw.h"
34 #include "freedreno_context.h"
35 #include "freedreno_resource.h"
36 #include "freedreno_util.h"
37
38 struct fd_hw_sample_period {
39 struct fd_hw_sample *start, *end;
40 struct list_head list;
41 };
42
43 /* maps query_type to sample provider idx: */
44 static int pidx(unsigned query_type)
45 {
46 switch (query_type) {
47 case PIPE_QUERY_OCCLUSION_COUNTER:
48 return 0;
49 case PIPE_QUERY_OCCLUSION_PREDICATE:
50 return 1;
51 /* TODO currently queries only emitted in main pass (not in binning pass)..
52 * which is fine for occlusion query, but pretty much not anything else.
53 */
54 case PIPE_QUERY_TIME_ELAPSED:
55 return 2;
56 case PIPE_QUERY_TIMESTAMP:
57 return 3;
58 default:
59 return -1;
60 }
61 }
62
63 static struct fd_hw_sample *
64 get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
65 unsigned query_type)
66 {
67 struct fd_hw_sample *samp = NULL;
68 int idx = pidx(query_type);
69
70 assume(idx >= 0); /* query never would have been created otherwise */
71
72 if (!ctx->sample_cache[idx]) {
73 ctx->sample_cache[idx] =
74 ctx->sample_providers[idx]->get_sample(ctx, ring);
75 ctx->batch->needs_flush = true;
76 }
77
78 fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
79
80 return samp;
81 }
82
83 static void
84 clear_sample_cache(struct fd_context *ctx)
85 {
86 int i;
87
88 for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
89 fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
90 }
91
92 static bool
93 is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
94 {
95 return !!(hq->provider->active & stage);
96 }
97
98
99 static void
100 resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
101 struct fd_ringbuffer *ring)
102 {
103 int idx = pidx(hq->provider->query_type);
104 assert(idx >= 0); /* query never would have been created otherwise */
105 assert(!hq->period);
106 ctx->active_providers |= (1 << idx);
107 hq->period = util_slab_alloc(&ctx->sample_period_pool);
108 list_inithead(&hq->period->list);
109 hq->period->start = get_sample(ctx, ring, hq->base.type);
110 /* NOTE: util_slab_alloc() does not zero out the buffer: */
111 hq->period->end = NULL;
112 }
113
114 static void
115 pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
116 struct fd_ringbuffer *ring)
117 {
118 int idx = pidx(hq->provider->query_type);
119 assert(idx >= 0); /* query never would have been created otherwise */
120 assert(hq->period && !hq->period->end);
121 assert(ctx->active_providers & (1 << idx));
122 hq->period->end = get_sample(ctx, ring, hq->base.type);
123 list_addtail(&hq->period->list, &hq->current_periods);
124 hq->period = NULL;
125 }
126
127 static void
128 destroy_periods(struct fd_context *ctx, struct list_head *list)
129 {
130 struct fd_hw_sample_period *period, *s;
131 LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
132 fd_hw_sample_reference(ctx, &period->start, NULL);
133 fd_hw_sample_reference(ctx, &period->end, NULL);
134 list_del(&period->list);
135 util_slab_free(&ctx->sample_period_pool, period);
136 }
137 }
138
139 static void
140 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
141 {
142 struct fd_hw_query *hq = fd_hw_query(q);
143
144 destroy_periods(ctx, &hq->periods);
145 destroy_periods(ctx, &hq->current_periods);
146 list_del(&hq->list);
147
148 free(hq);
149 }
150
151 static boolean
152 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
153 {
154 struct fd_hw_query *hq = fd_hw_query(q);
155 if (q->active)
156 return false;
157
158 /* begin_query() should clear previous results: */
159 destroy_periods(ctx, &hq->periods);
160
161 if (is_active(hq, ctx->stage))
162 resume_query(ctx, hq, ctx->batch->draw);
163
164 q->active = true;
165
166 /* add to active list: */
167 list_del(&hq->list);
168 list_addtail(&hq->list, &ctx->active_queries);
169 return true;
170 }
171
172 static void
173 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
174 {
175 struct fd_hw_query *hq = fd_hw_query(q);
176 /* there are a couple special cases, which don't have
177 * a matching ->begin_query():
178 */
179 if (skip_begin_query(q->type) && !q->active) {
180 fd_hw_begin_query(ctx, q);
181 }
182 if (!q->active)
183 return;
184 if (is_active(hq, ctx->stage))
185 pause_query(ctx, hq, ctx->batch->draw);
186 q->active = false;
187 /* move to current list: */
188 list_del(&hq->list);
189 list_addtail(&hq->list, &ctx->current_queries);
190 }
191
192 /* helper to get ptr to specified sample: */
193 static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
194 {
195 return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
196 }
197
198 static boolean
199 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
200 boolean wait, union pipe_query_result *result)
201 {
202 struct fd_hw_query *hq = fd_hw_query(q);
203 const struct fd_hw_sample_provider *p = hq->provider;
204 struct fd_hw_sample_period *period;
205
206 if (q->active)
207 return false;
208
209 /* if the app tries to read back the query result before the
210 * batch is submitted, that forces us to flush so that there
211 * are actually results to wait for:
212 */
213 if (!LIST_IS_EMPTY(&hq->list)) {
214 /* if app didn't actually trigger any cmdstream, then
215 * we have nothing to do:
216 */
217 if (!ctx->batch->needs_flush)
218 return true;
219 DBG("reading query result forces flush!");
220 fd_batch_flush(ctx->batch);
221 }
222
223 util_query_clear_result(result, q->type);
224
225 if (LIST_IS_EMPTY(&hq->periods))
226 return true;
227
228 assert(LIST_IS_EMPTY(&hq->list));
229 assert(LIST_IS_EMPTY(&hq->current_periods));
230 assert(!hq->period);
231
232 /* if !wait, then check the last sample (the one most likely to
233 * not be ready yet) and bail if it is not ready:
234 */
235 if (!wait) {
236 int ret;
237
238 period = LIST_ENTRY(struct fd_hw_sample_period,
239 hq->periods.prev, list);
240
241 struct fd_resource *rsc = fd_resource(period->end->prsc);
242
243 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe,
244 DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
245 if (ret)
246 return false;
247
248 fd_bo_cpu_fini(rsc->bo);
249 }
250
251 /* sum the result across all sample periods: */
252 LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
253 struct fd_hw_sample *start = period->start;
254 struct fd_hw_sample *end = period->end;
255 unsigned i;
256
257 /* start and end samples should be from same batch: */
258 assert(start->prsc == end->prsc);
259 assert(start->num_tiles == end->num_tiles);
260
261 struct fd_resource *rsc = fd_resource(start->prsc);
262
263 fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ);
264
265 void *ptr = fd_bo_map(rsc->bo);
266
267 for (i = 0; i < start->num_tiles; i++) {
268 p->accumulate_result(ctx, sampptr(period->start, i, ptr),
269 sampptr(period->end, i, ptr), result);
270 }
271
272 fd_bo_cpu_fini(rsc->bo);
273 }
274
275 return true;
276 }
277
278 static const struct fd_query_funcs hw_query_funcs = {
279 .destroy_query = fd_hw_destroy_query,
280 .begin_query = fd_hw_begin_query,
281 .end_query = fd_hw_end_query,
282 .get_query_result = fd_hw_get_query_result,
283 };
284
285 struct fd_query *
286 fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
287 {
288 struct fd_hw_query *hq;
289 struct fd_query *q;
290 int idx = pidx(query_type);
291
292 if ((idx < 0) || !ctx->sample_providers[idx])
293 return NULL;
294
295 hq = CALLOC_STRUCT(fd_hw_query);
296 if (!hq)
297 return NULL;
298
299 hq->provider = ctx->sample_providers[idx];
300
301 list_inithead(&hq->periods);
302 list_inithead(&hq->current_periods);
303 list_inithead(&hq->list);
304
305 q = &hq->base;
306 q->funcs = &hw_query_funcs;
307 q->type = query_type;
308
309 return q;
310 }
311
312 struct fd_hw_sample *
313 fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
314 {
315 struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
316 pipe_reference_init(&samp->reference, 1);
317 samp->size = size;
318 debug_assert(util_is_power_of_two(size));
319 ctx->next_sample_offset = align(ctx->next_sample_offset, size);
320 samp->offset = ctx->next_sample_offset;
321 /* NOTE: util_slab_alloc() does not zero out the buffer: */
322 samp->prsc = NULL;
323 samp->num_tiles = 0;
324 samp->tile_stride = 0;
325 ctx->next_sample_offset += size;
326 return samp;
327 }
328
329 void
330 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
331 {
332 pipe_resource_reference(&samp->prsc, NULL);
333 util_slab_free(&ctx->sample_pool, samp);
334 }
335
336 static void
337 prepare_sample(struct fd_hw_sample *samp, struct pipe_resource *prsc,
338 uint32_t num_tiles, uint32_t tile_stride)
339 {
340 if (samp->prsc) {
341 assert(samp->prsc == prsc);
342 assert(samp->num_tiles == num_tiles);
343 assert(samp->tile_stride == tile_stride);
344 return;
345 }
346 pipe_resource_reference(&samp->prsc, prsc);
347 samp->num_tiles = num_tiles;
348 samp->tile_stride = tile_stride;
349 }
350
351 static void
352 prepare_query(struct fd_hw_query *hq, struct pipe_resource *prsc,
353 uint32_t num_tiles, uint32_t tile_stride)
354 {
355 struct fd_hw_sample_period *period, *s;
356
357 /* prepare all the samples in the query: */
358 LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
359 prepare_sample(period->start, prsc, num_tiles, tile_stride);
360 prepare_sample(period->end, prsc, num_tiles, tile_stride);
361
362 /* move from current_periods list to periods list: */
363 list_del(&period->list);
364 list_addtail(&period->list, &hq->periods);
365 }
366 }
367
368 static void
369 prepare_queries(struct fd_context *ctx, struct pipe_resource *prsc,
370 uint32_t num_tiles, uint32_t tile_stride,
371 struct list_head *list, bool remove)
372 {
373 struct fd_hw_query *hq, *s;
374 LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
375 prepare_query(hq, prsc, num_tiles, tile_stride);
376 if (remove)
377 list_delinit(&hq->list);
378 }
379 }
380
381 /* called from gmem code once total storage requirements are known (ie.
382 * number of samples times number of tiles)
383 */
384 void
385 fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
386 {
387 uint32_t tile_stride = ctx->next_sample_offset;
388 struct pipe_resource *prsc;
389
390 pipe_resource_reference(&ctx->query_buf, NULL);
391
392 if (tile_stride > 0) {
393 struct pipe_screen *pscreen = &ctx->screen->base;
394 struct pipe_resource templ = {
395 .target = PIPE_BUFFER,
396 .format = PIPE_FORMAT_R8_UNORM,
397 .bind = PIPE_BIND_QUERY_BUFFER,
398 .width0 = tile_stride * num_tiles,
399 .height0 = 1,
400 .depth0 = 1,
401 .array_size = 1,
402 .last_level = 0,
403 .nr_samples = 1,
404 };
405 prsc = pscreen->resource_create(pscreen, &templ);
406 } else {
407 prsc = NULL;
408 }
409
410 ctx->query_buf = prsc;
411 ctx->query_tile_stride = tile_stride;
412
413 prepare_queries(ctx, prsc, num_tiles, tile_stride,
414 &ctx->active_queries, false);
415 prepare_queries(ctx, prsc, num_tiles, tile_stride,
416 &ctx->current_queries, true);
417
418 /* reset things for next batch: */
419 ctx->next_sample_offset = 0;
420 }
421
422 void
423 fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
424 struct fd_ringbuffer *ring)
425 {
426 uint32_t tile_stride = ctx->query_tile_stride;
427 uint32_t offset = tile_stride * n;
428
429 /* bail if no queries: */
430 if (tile_stride == 0)
431 return;
432
433 fd_wfi(ctx, ring);
434 OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
435 OUT_RELOCW(ring, fd_resource(ctx->query_buf)->bo, offset, 0, 0);
436 }
437
438 void
439 fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
440 enum fd_render_stage stage)
441 {
442 /* special case: internal blits (like mipmap level generation)
443 * go through normal draw path (via util_blitter_blit()).. but
444 * we need to ignore the FD_STAGE_DRAW which will be set, so we
445 * don't enable queries which should be paused during internal
446 * blits:
447 */
448 if ((ctx->stage == FD_STAGE_BLIT) &&
449 (stage != FD_STAGE_NULL))
450 return;
451
452 if (stage != ctx->stage) {
453 struct fd_hw_query *hq;
454 LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
455 bool was_active = is_active(hq, ctx->stage);
456 bool now_active = is_active(hq, stage);
457
458 if (now_active && !was_active)
459 resume_query(ctx, hq, ring);
460 else if (was_active && !now_active)
461 pause_query(ctx, hq, ring);
462 }
463 }
464 clear_sample_cache(ctx);
465 ctx->stage = stage;
466 }
467
468 /* call the provider->enable() for all the hw queries that were active
469 * in the current batch. This sets up perfctr selector regs statically
470 * for the duration of the batch.
471 */
472 void
473 fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
474 {
475 for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
476 if (ctx->active_providers & (1 << idx)) {
477 assert(ctx->sample_providers[idx]);
478 if (ctx->sample_providers[idx]->enable)
479 ctx->sample_providers[idx]->enable(ctx, ring);
480 }
481 }
482 ctx->active_providers = 0; /* clear it for next frame */
483 }
484
485 void
486 fd_hw_query_register_provider(struct pipe_context *pctx,
487 const struct fd_hw_sample_provider *provider)
488 {
489 struct fd_context *ctx = fd_context(pctx);
490 int idx = pidx(provider->query_type);
491
492 assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
493 assert(!ctx->sample_providers[idx]);
494
495 ctx->sample_providers[idx] = provider;
496 }
497
498 void
499 fd_hw_query_init(struct pipe_context *pctx)
500 {
501 struct fd_context *ctx = fd_context(pctx);
502
503 util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
504 16, UTIL_SLAB_SINGLETHREADED);
505 util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
506 16, UTIL_SLAB_SINGLETHREADED);
507 list_inithead(&ctx->active_queries);
508 list_inithead(&ctx->current_queries);
509 }
510
511 void
512 fd_hw_query_fini(struct pipe_context *pctx)
513 {
514 struct fd_context *ctx = fd_context(pctx);
515
516 util_slab_destroy(&ctx->sample_pool);
517 util_slab_destroy(&ctx->sample_period_pool);
518 }