freedreno: threaded batch flush
[mesa.git] / src / gallium / drivers / freedreno / freedreno_query_hw.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32
33 #include "freedreno_query_hw.h"
34 #include "freedreno_context.h"
35 #include "freedreno_resource.h"
36 #include "freedreno_util.h"
37
38 struct fd_hw_sample_period {
39 struct fd_hw_sample *start, *end;
40 struct list_head list;
41 };
42
43 /* maps query_type to sample provider idx: */
44 static int pidx(unsigned query_type)
45 {
46 switch (query_type) {
47 case PIPE_QUERY_OCCLUSION_COUNTER:
48 return 0;
49 case PIPE_QUERY_OCCLUSION_PREDICATE:
50 return 1;
51 /* TODO currently queries only emitted in main pass (not in binning pass)..
52 * which is fine for occlusion query, but pretty much not anything else.
53 */
54 case PIPE_QUERY_TIME_ELAPSED:
55 return 2;
56 case PIPE_QUERY_TIMESTAMP:
57 return 3;
58 default:
59 return -1;
60 }
61 }
62
63 static struct fd_hw_sample *
64 get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
65 unsigned query_type)
66 {
67 struct fd_context *ctx = batch->ctx;
68 struct fd_hw_sample *samp = NULL;
69 int idx = pidx(query_type);
70
71 assume(idx >= 0); /* query never would have been created otherwise */
72
73 if (!batch->sample_cache[idx]) {
74 struct fd_hw_sample *new_samp =
75 ctx->sample_providers[idx]->get_sample(batch, ring);
76 fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
77 util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
78 batch->needs_flush = true;
79 }
80
81 fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
82
83 return samp;
84 }
85
86 static void
87 clear_sample_cache(struct fd_batch *batch)
88 {
89 int i;
90
91 for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
92 fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
93 }
94
95 static bool
96 is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
97 {
98 return !!(hq->provider->active & stage);
99 }
100
101
102 static void
103 resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
104 struct fd_ringbuffer *ring)
105 {
106 int idx = pidx(hq->provider->query_type);
107 assert(idx >= 0); /* query never would have been created otherwise */
108 assert(!hq->period);
109 batch->active_providers |= (1 << idx);
110 hq->period = util_slab_alloc(&batch->ctx->sample_period_pool);
111 list_inithead(&hq->period->list);
112 hq->period->start = get_sample(batch, ring, hq->base.type);
113 /* NOTE: util_slab_alloc() does not zero out the buffer: */
114 hq->period->end = NULL;
115 }
116
117 static void
118 pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
119 struct fd_ringbuffer *ring)
120 {
121 int idx = pidx(hq->provider->query_type);
122 assert(idx >= 0); /* query never would have been created otherwise */
123 assert(hq->period && !hq->period->end);
124 assert(batch->active_providers & (1 << idx));
125 hq->period->end = get_sample(batch, ring, hq->base.type);
126 list_addtail(&hq->period->list, &hq->periods);
127 hq->period = NULL;
128 }
129
130 static void
131 destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
132 {
133 struct fd_hw_sample_period *period, *s;
134 LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
135 fd_hw_sample_reference(ctx, &period->start, NULL);
136 fd_hw_sample_reference(ctx, &period->end, NULL);
137 list_del(&period->list);
138 util_slab_free(&ctx->sample_period_pool, period);
139 }
140 }
141
142 static void
143 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
144 {
145 struct fd_hw_query *hq = fd_hw_query(q);
146
147 destroy_periods(ctx, hq);
148 list_del(&hq->list);
149
150 free(hq);
151 }
152
153 static boolean
154 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
155 {
156 struct fd_batch *batch = ctx->batch;
157 struct fd_hw_query *hq = fd_hw_query(q);
158
159 if (q->active)
160 return false;
161
162 /* begin_query() should clear previous results: */
163 destroy_periods(ctx, hq);
164
165 if (batch && is_active(hq, batch->stage))
166 resume_query(batch, hq, batch->draw);
167
168 q->active = true;
169
170 /* add to active list: */
171 assert(list_empty(&hq->list));
172 list_addtail(&hq->list, &ctx->active_queries);
173
174 return true;
175 }
176
177 static void
178 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
179 {
180 struct fd_batch *batch = ctx->batch;
181 struct fd_hw_query *hq = fd_hw_query(q);
182 /* there are a couple special cases, which don't have
183 * a matching ->begin_query():
184 */
185 if (skip_begin_query(q->type) && !q->active) {
186 fd_hw_begin_query(ctx, q);
187 }
188 if (!q->active)
189 return;
190 if (batch && is_active(hq, batch->stage))
191 pause_query(batch, hq, batch->draw);
192 q->active = false;
193 /* remove from active list: */
194 list_delinit(&hq->list);
195 }
196
197 /* helper to get ptr to specified sample: */
198 static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
199 {
200 return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
201 }
202
203 static boolean
204 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
205 boolean wait, union pipe_query_result *result)
206 {
207 struct fd_hw_query *hq = fd_hw_query(q);
208 const struct fd_hw_sample_provider *p = hq->provider;
209 struct fd_hw_sample_period *period;
210
211 if (q->active)
212 return false;
213
214 util_query_clear_result(result, q->type);
215
216 if (LIST_IS_EMPTY(&hq->periods))
217 return true;
218
219 assert(LIST_IS_EMPTY(&hq->list));
220 assert(!hq->period);
221
222 /* if !wait, then check the last sample (the one most likely to
223 * not be ready yet) and bail if it is not ready:
224 */
225 if (!wait) {
226 int ret;
227
228 period = LIST_ENTRY(struct fd_hw_sample_period,
229 hq->periods.prev, list);
230
231 struct fd_resource *rsc = fd_resource(period->end->prsc);
232
233 if (pending(rsc, false)) {
234 /* piglit spec@arb_occlusion_query@occlusion_query_conform
235 * test, and silly apps perhaps, get stuck in a loop trying
236 * to get query result forever with wait==false.. we don't
237 * wait to flush unnecessarily but we also don't want to
238 * spin forever:
239 */
240 if (hq->no_wait_cnt++ > 5)
241 fd_batch_flush(rsc->write_batch, false);
242 return false;
243 }
244
245 if (!rsc->bo)
246 return false;
247
248 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe,
249 DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
250 if (ret)
251 return false;
252
253 fd_bo_cpu_fini(rsc->bo);
254 }
255
256 /* sum the result across all sample periods: */
257 LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
258 struct fd_hw_sample *start = period->start;
259 struct fd_hw_sample *end = period->end;
260 unsigned i;
261
262 /* start and end samples should be from same batch: */
263 assert(start->prsc == end->prsc);
264 assert(start->num_tiles == end->num_tiles);
265
266 struct fd_resource *rsc = fd_resource(start->prsc);
267
268 if (rsc->write_batch)
269 fd_batch_flush(rsc->write_batch, true);
270
271 /* some piglit tests at least do query with no draws, I guess: */
272 if (!rsc->bo)
273 continue;
274
275 fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ);
276
277 void *ptr = fd_bo_map(rsc->bo);
278
279 for (i = 0; i < start->num_tiles; i++) {
280 p->accumulate_result(ctx, sampptr(period->start, i, ptr),
281 sampptr(period->end, i, ptr), result);
282 }
283
284 fd_bo_cpu_fini(rsc->bo);
285 }
286
287 return true;
288 }
289
290 static const struct fd_query_funcs hw_query_funcs = {
291 .destroy_query = fd_hw_destroy_query,
292 .begin_query = fd_hw_begin_query,
293 .end_query = fd_hw_end_query,
294 .get_query_result = fd_hw_get_query_result,
295 };
296
297 struct fd_query *
298 fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
299 {
300 struct fd_hw_query *hq;
301 struct fd_query *q;
302 int idx = pidx(query_type);
303
304 if ((idx < 0) || !ctx->sample_providers[idx])
305 return NULL;
306
307 hq = CALLOC_STRUCT(fd_hw_query);
308 if (!hq)
309 return NULL;
310
311 hq->provider = ctx->sample_providers[idx];
312
313 list_inithead(&hq->periods);
314 list_inithead(&hq->list);
315
316 q = &hq->base;
317 q->funcs = &hw_query_funcs;
318 q->type = query_type;
319
320 return q;
321 }
322
323 struct fd_hw_sample *
324 fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
325 {
326 struct fd_hw_sample *samp = util_slab_alloc(&batch->ctx->sample_pool);
327 pipe_reference_init(&samp->reference, 1);
328 samp->size = size;
329 debug_assert(util_is_power_of_two(size));
330 batch->next_sample_offset = align(batch->next_sample_offset, size);
331 samp->offset = batch->next_sample_offset;
332 /* NOTE: util_slab_alloc() does not zero out the buffer: */
333 samp->prsc = NULL;
334 samp->num_tiles = 0;
335 samp->tile_stride = 0;
336 batch->next_sample_offset += size;
337
338 if (!batch->query_buf) {
339 struct pipe_screen *pscreen = &batch->ctx->screen->base;
340 struct pipe_resource templ = {
341 .target = PIPE_BUFFER,
342 .format = PIPE_FORMAT_R8_UNORM,
343 .bind = PIPE_BIND_QUERY_BUFFER,
344 .width0 = 0, /* create initially zero size buffer */
345 .height0 = 1,
346 .depth0 = 1,
347 .array_size = 1,
348 .last_level = 0,
349 .nr_samples = 1,
350 };
351 batch->query_buf = pscreen->resource_create(pscreen, &templ);
352 }
353
354 pipe_resource_reference(&samp->prsc, batch->query_buf);
355
356 return samp;
357 }
358
359 void
360 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
361 {
362 pipe_resource_reference(&samp->prsc, NULL);
363 util_slab_free(&ctx->sample_pool, samp);
364 }
365
366 /* called from gmem code once total storage requirements are known (ie.
367 * number of samples times number of tiles)
368 */
369 void
370 fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
371 {
372 uint32_t tile_stride = batch->next_sample_offset;
373
374 if (tile_stride > 0)
375 fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
376
377 batch->query_tile_stride = tile_stride;
378
379 while (batch->samples.size > 0) {
380 struct fd_hw_sample *samp =
381 util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
382 samp->num_tiles = num_tiles;
383 samp->tile_stride = tile_stride;
384 fd_hw_sample_reference(batch->ctx, &samp, NULL);
385 }
386
387 /* reset things for next batch: */
388 batch->next_sample_offset = 0;
389 }
390
391 void
392 fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
393 struct fd_ringbuffer *ring)
394 {
395 uint32_t tile_stride = batch->query_tile_stride;
396 uint32_t offset = tile_stride * n;
397
398 /* bail if no queries: */
399 if (tile_stride == 0)
400 return;
401
402 fd_wfi(batch->ctx, ring);
403 OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
404 OUT_RELOCW(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
405 }
406
407 void
408 fd_hw_query_set_stage(struct fd_batch *batch, struct fd_ringbuffer *ring,
409 enum fd_render_stage stage)
410 {
411 /* special case: internal blits (like mipmap level generation)
412 * go through normal draw path (via util_blitter_blit()).. but
413 * we need to ignore the FD_STAGE_DRAW which will be set, so we
414 * don't enable queries which should be paused during internal
415 * blits:
416 */
417 if ((batch->stage == FD_STAGE_BLIT) &&
418 (stage != FD_STAGE_NULL))
419 return;
420
421 if (stage != batch->stage) {
422 struct fd_hw_query *hq;
423 LIST_FOR_EACH_ENTRY(hq, &batch->ctx->active_queries, list) {
424 bool was_active = is_active(hq, batch->stage);
425 bool now_active = is_active(hq, stage);
426
427 if (now_active && !was_active)
428 resume_query(batch, hq, ring);
429 else if (was_active && !now_active)
430 pause_query(batch, hq, ring);
431 }
432 }
433 clear_sample_cache(batch);
434 batch->stage = stage;
435 }
436
437 /* call the provider->enable() for all the hw queries that were active
438 * in the current batch. This sets up perfctr selector regs statically
439 * for the duration of the batch.
440 */
441 void
442 fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
443 {
444 struct fd_context *ctx = batch->ctx;
445 for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
446 if (batch->active_providers & (1 << idx)) {
447 assert(ctx->sample_providers[idx]);
448 if (ctx->sample_providers[idx]->enable)
449 ctx->sample_providers[idx]->enable(ctx, ring);
450 }
451 }
452 batch->active_providers = 0; /* clear it for next frame */
453 }
454
455 void
456 fd_hw_query_register_provider(struct pipe_context *pctx,
457 const struct fd_hw_sample_provider *provider)
458 {
459 struct fd_context *ctx = fd_context(pctx);
460 int idx = pidx(provider->query_type);
461
462 assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
463 assert(!ctx->sample_providers[idx]);
464
465 ctx->sample_providers[idx] = provider;
466 }
467
468 void
469 fd_hw_query_init(struct pipe_context *pctx)
470 {
471 struct fd_context *ctx = fd_context(pctx);
472
473 util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
474 16, UTIL_SLAB_SINGLETHREADED);
475 util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
476 16, UTIL_SLAB_SINGLETHREADED);
477 list_inithead(&ctx->active_queries);
478 }
479
480 void
481 fd_hw_query_fini(struct pipe_context *pctx)
482 {
483 struct fd_context *ctx = fd_context(pctx);
484
485 util_slab_destroy(&ctx->sample_pool);
486 util_slab_destroy(&ctx->sample_period_pool);
487 }