src/gallium/drivers/freedreno/freedreno_batch.c

   1 /*
   2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "util/list.h"
  28 #include "util/set.h"
  29 #include "util/hash_table.h"
  30 #include "util/u_string.h"
  31
  32 #include "freedreno_batch.h"
  33 #include "freedreno_context.h"
  34 #include "freedreno_fence.h"
  35 #include "freedreno_resource.h"
  36 #include "freedreno_query_hw.h"
  37
  38 static void
  39 batch_init(struct fd_batch *batch)
  40 {
  41         struct fd_context *ctx = batch->ctx;
  42         unsigned size = 0;
  43
  44         if (ctx->screen->reorder)
  45                 util_queue_fence_init(&batch->flush_fence);
  46
  47         /* if kernel is too old to support unlimited # of cmd buffers, we
  48          * have no option but to allocate large worst-case sizes so that
  49          * we don't need to grow the ringbuffer.  Performance is likely to
  50          * suffer, but there is no good alternative.
  51          *
  52          * XXX I think we can just require new enough kernel for this?
  53          */
  54         if ((fd_device_version(ctx->screen->dev) < FD_VERSION_UNLIMITED_CMDS) ||
  55                         (fd_mesa_debug & FD_DBG_NOGROW)){
  56                 size = 0x100000;
  57         }
  58
  59         batch->submit = fd_submit_new(ctx->pipe);
  60         if (batch->nondraw) {
  61                 batch->draw = fd_submit_new_ringbuffer(batch->submit, size,
  62                                 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
  63         } else {
  64                 batch->gmem = fd_submit_new_ringbuffer(batch->submit, size,
  65                                 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
  66                 batch->draw = fd_submit_new_ringbuffer(batch->submit, size,
  67                                 FD_RINGBUFFER_GROWABLE);
  68
  69                 if (ctx->screen->gpu_id < 600) {
  70                         batch->binning = fd_submit_new_ringbuffer(batch->submit,
  71                                         size, FD_RINGBUFFER_GROWABLE);
  72                 }
  73         }
  74
  75         batch->in_fence_fd = -1;
  76         batch->fence = fd_fence_create(batch);
  77
  78         batch->cleared = 0;
  79         batch->fast_cleared = 0;
  80         batch->invalidated = 0;
  81         batch->restore = batch->resolve = 0;
  82         batch->needs_flush = false;
  83         batch->flushed = false;
  84         batch->gmem_reason = 0;
  85         batch->num_draws = 0;
  86         batch->num_vertices = 0;
  87         batch->stage = FD_STAGE_NULL;
  88
  89         fd_reset_wfi(batch);
  90
  91         util_dynarray_init(&batch->draw_patches, NULL);
  92         util_dynarray_init(&batch->fb_read_patches, NULL);
  93
  94         if (is_a2xx(ctx->screen)) {
  95                 util_dynarray_init(&batch->shader_patches, NULL);
  96                 util_dynarray_init(&batch->gmem_patches, NULL);
  97         }
  98
  99         if (is_a3xx(ctx->screen))
 100                 util_dynarray_init(&batch->rbrc_patches, NULL);
 101
 102         assert(batch->resources->entries == 0);
 103
 104         util_dynarray_init(&batch->samples, NULL);
 105 }
 106
 107 struct fd_batch *
 108 fd_batch_create(struct fd_context *ctx, bool nondraw)
 109 {
 110         struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
 111
 112         if (!batch)
 113                 return NULL;
 114
 115         DBG("%p", batch);
 116
 117         pipe_reference_init(&batch->reference, 1);
 118         batch->ctx = ctx;
 119         batch->nondraw = nondraw;
 120
 121         batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer,
 122                         _mesa_key_pointer_equal);
 123
 124         batch_init(batch);
 125
 126         return batch;
 127 }
 128
 129 static void
 130 batch_fini(struct fd_batch *batch)
 131 {
 132         DBG("%p", batch);
 133
 134         pipe_resource_reference(&batch->query_buf, NULL);
 135
 136         if (batch->in_fence_fd != -1)
 137                 close(batch->in_fence_fd);
 138
 139         /* in case batch wasn't flushed but fence was created: */
 140         fd_fence_populate(batch->fence, 0, -1);
 141
 142         fd_fence_ref(&batch->fence, NULL);
 143
 144         fd_ringbuffer_del(batch->draw);
 145         if (!batch->nondraw) {
 146                 if (batch->binning)
 147                         fd_ringbuffer_del(batch->binning);
 148                 fd_ringbuffer_del(batch->gmem);
 149         } else {
 150                 debug_assert(!batch->binning);
 151                 debug_assert(!batch->gmem);
 152         }
 153
 154         if (batch->lrz_clear) {
 155                 fd_ringbuffer_del(batch->lrz_clear);
 156                 batch->lrz_clear = NULL;
 157         }
 158
 159         if (batch->tile_setup) {
 160                 fd_ringbuffer_del(batch->tile_setup);
 161                 batch->tile_setup = NULL;
 162         }
 163
 164         if (batch->tile_fini) {
 165                 fd_ringbuffer_del(batch->tile_fini);
 166                 batch->tile_fini = NULL;
 167         }
 168
 169         if (batch->tessellation) {
 170                 fd_bo_del(batch->tessfactor_bo);
 171                 fd_bo_del(batch->tessparam_bo);
 172                 fd_ringbuffer_del(batch->tess_addrs_constobj);
 173         }
 174
 175         fd_submit_del(batch->submit);
 176
 177         util_dynarray_fini(&batch->draw_patches);
 178         util_dynarray_fini(&batch->fb_read_patches);
 179
 180         if (is_a2xx(batch->ctx->screen)) {
 181                 util_dynarray_fini(&batch->shader_patches);
 182                 util_dynarray_fini(&batch->gmem_patches);
 183         }
 184
 185         if (is_a3xx(batch->ctx->screen))
 186                 util_dynarray_fini(&batch->rbrc_patches);
 187
 188         while (batch->samples.size > 0) {
 189                 struct fd_hw_sample *samp =
 190                         util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
 191                 fd_hw_sample_reference(batch->ctx, &samp, NULL);
 192         }
 193         util_dynarray_fini(&batch->samples);
 194
 195         if (batch->ctx->screen->reorder)
 196                 util_queue_fence_destroy(&batch->flush_fence);
 197 }
 198
 199 static void
 200 batch_flush_reset_dependencies(struct fd_batch *batch, bool flush)
 201 {
 202         struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
 203         struct fd_batch *dep;
 204
 205         foreach_batch(dep, cache, batch->dependents_mask) {
 206                 if (flush)
 207                         fd_batch_flush(dep, false);
 208                 fd_batch_reference(&dep, NULL);
 209         }
 210
 211         batch->dependents_mask = 0;
 212 }
 213
 214 static void
 215 batch_reset_resources_locked(struct fd_batch *batch)
 216 {
 217         pipe_mutex_assert_locked(batch->ctx->screen->lock);
 218
 219         set_foreach(batch->resources, entry) {
 220                 struct fd_resource *rsc = (struct fd_resource *)entry->key;
 221                 _mesa_set_remove(batch->resources, entry);
 222                 debug_assert(rsc->batch_mask & (1 << batch->idx));
 223                 rsc->batch_mask &= ~(1 << batch->idx);
 224                 if (rsc->write_batch == batch)
 225                         fd_batch_reference_locked(&rsc->write_batch, NULL);
 226         }
 227 }
 228
 229 static void
 230 batch_reset_resources(struct fd_batch *batch)
 231 {
 232         mtx_lock(&batch->ctx->screen->lock);
 233         batch_reset_resources_locked(batch);
 234         mtx_unlock(&batch->ctx->screen->lock);
 235 }
 236
 237 static void
 238 batch_reset(struct fd_batch *batch)
 239 {
 240         DBG("%p", batch);
 241
 242         fd_batch_sync(batch);
 243
 244         batch_flush_reset_dependencies(batch, false);
 245         batch_reset_resources(batch);
 246
 247         batch_fini(batch);
 248         batch_init(batch);
 249 }
 250
 251 void
 252 fd_batch_reset(struct fd_batch *batch)
 253 {
 254         if (batch->needs_flush)
 255                 batch_reset(batch);
 256 }
 257
 258 void
 259 __fd_batch_destroy(struct fd_batch *batch)
 260 {
 261         struct fd_context *ctx = batch->ctx;
 262
 263         DBG("%p", batch);
 264
 265         fd_context_assert_locked(batch->ctx);
 266
 267         fd_bc_invalidate_batch(batch, true);
 268
 269         batch_reset_resources_locked(batch);
 270         debug_assert(batch->resources->entries == 0);
 271         _mesa_set_destroy(batch->resources, NULL);
 272
 273         fd_context_unlock(ctx);
 274         batch_flush_reset_dependencies(batch, false);
 275         debug_assert(batch->dependents_mask == 0);
 276
 277         util_copy_framebuffer_state(&batch->framebuffer, NULL);
 278         batch_fini(batch);
 279         free(batch);
 280         fd_context_lock(ctx);
 281 }
 282
 283 void
 284 __fd_batch_describe(char* buf, const struct fd_batch *batch)
 285 {
 286         sprintf(buf, "fd_batch<%u>", batch->seqno);
 287 }
 288
 289 void
 290 fd_batch_sync(struct fd_batch *batch)
 291 {
 292         if (!batch->ctx->screen->reorder)
 293                 return;
 294         util_queue_fence_wait(&batch->flush_fence);
 295 }
 296
 297 static void
 298 batch_flush_func(void *job, int id)
 299 {
 300         struct fd_batch *batch = job;
 301
 302         DBG("%p", batch);
 303
 304         fd_gmem_render_tiles(batch);
 305         batch_reset_resources(batch);
 306 }
 307
 308 static void
 309 batch_cleanup_func(void *job, int id)
 310 {
 311         struct fd_batch *batch = job;
 312         fd_batch_reference(&batch, NULL);
 313 }
 314
 315 static void
 316 batch_flush(struct fd_batch *batch)
 317 {
 318         DBG("%p: needs_flush=%d", batch, batch->needs_flush);
 319
 320         if (batch->flushed)
 321                 return;
 322
 323         batch->needs_flush = false;
 324
 325         /* close out the draw cmds by making sure any active queries are
 326          * paused:
 327          */
 328         fd_batch_set_stage(batch, FD_STAGE_NULL);
 329
 330         batch_flush_reset_dependencies(batch, true);
 331
 332         batch->flushed = true;
 333
 334         fd_fence_ref(&batch->ctx->last_fence, batch->fence);
 335
 336         if (batch->ctx->screen->reorder) {
 337                 struct fd_batch *tmp = NULL;
 338                 fd_batch_reference(&tmp, batch);
 339
 340                 if (!util_queue_is_initialized(&batch->ctx->flush_queue))
 341                         util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1, 0);
 342
 343                 util_queue_add_job(&batch->ctx->flush_queue,
 344                                 batch, &batch->flush_fence,
 345                                 batch_flush_func, batch_cleanup_func, 0);
 346         } else {
 347                 fd_gmem_render_tiles(batch);
 348                 batch_reset_resources(batch);
 349         }
 350
 351         debug_assert(batch->reference.count > 0);
 352
 353         mtx_lock(&batch->ctx->screen->lock);
 354         fd_bc_invalidate_batch(batch, false);
 355         mtx_unlock(&batch->ctx->screen->lock);
 356 }
 357
 358 /* NOTE: could drop the last ref to batch
 359  *
 360  * @sync: synchronize with flush_queue, ensures batch is *actually* flushed
 361  *   to kernel before this returns, as opposed to just being queued to be
 362  *   flushed
 363  * @force: force a flush even if no rendering, mostly useful if you need
 364  *   a fence to sync on
 365  */
 366 void
 367 fd_batch_flush(struct fd_batch *batch, bool sync)
 368 {
 369         struct fd_batch *tmp = NULL;
 370         bool newbatch = false;
 371
 372         /* NOTE: we need to hold an extra ref across the body of flush,
 373          * since the last ref to this batch could be dropped when cleaning
 374          * up used_resources
 375          */
 376         fd_batch_reference(&tmp, batch);
 377
 378         if (batch == batch->ctx->batch) {
 379                 batch->ctx->batch = NULL;
 380                 newbatch = true;
 381         }
 382
 383         batch_flush(tmp);
 384
 385         if (newbatch) {
 386                 struct fd_context *ctx = batch->ctx;
 387                 struct fd_batch *new_batch;
 388
 389                 if (ctx->screen->reorder) {
 390                         /* defer allocating new batch until one is needed for rendering
 391                          * to avoid unused batches for apps that create many contexts
 392                          */
 393                         new_batch = NULL;
 394                 } else {
 395                         new_batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, false);
 396                         util_copy_framebuffer_state(&new_batch->framebuffer, &batch->framebuffer);
 397                 }
 398
 399                 fd_batch_reference(&batch, NULL);
 400                 ctx->batch = new_batch;
 401                 fd_context_all_dirty(ctx);
 402         }
 403
 404         if (sync)
 405                 fd_batch_sync(tmp);
 406
 407         fd_batch_reference(&tmp, NULL);
 408 }
 409
 410 /* find a batches dependents mask, including recursive dependencies: */
 411 static uint32_t
 412 recursive_dependents_mask(struct fd_batch *batch)
 413 {
 414         struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
 415         struct fd_batch *dep;
 416         uint32_t dependents_mask = batch->dependents_mask;
 417
 418         foreach_batch(dep, cache, batch->dependents_mask)
 419                 dependents_mask |= recursive_dependents_mask(dep);
 420
 421         return dependents_mask;
 422 }
 423
 424 void
 425 fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
 426 {
 427         pipe_mutex_assert_locked(batch->ctx->screen->lock);
 428
 429         if (batch->dependents_mask & (1 << dep->idx))
 430                 return;
 431
 432         /* a loop should not be possible */
 433         debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep)));
 434
 435         struct fd_batch *other = NULL;
 436         fd_batch_reference_locked(&other, dep);
 437         batch->dependents_mask |= (1 << dep->idx);
 438         DBG("%p: added dependency on %p", batch, dep);
 439 }
 440
 441 static void
 442 flush_write_batch(struct fd_resource *rsc)
 443 {
 444         struct fd_batch *b = NULL;
 445         fd_batch_reference_locked(&b, rsc->write_batch);
 446
 447         mtx_unlock(&b->ctx->screen->lock);
 448         fd_batch_flush(b, true);
 449         mtx_lock(&b->ctx->screen->lock);
 450
 451         fd_bc_invalidate_batch(b, false);
 452         fd_batch_reference_locked(&b, NULL);
 453 }
 454
 455 void
 456 fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write)
 457 {
 458         pipe_mutex_assert_locked(batch->ctx->screen->lock);
 459
 460         if (rsc->stencil)
 461                 fd_batch_resource_used(batch, rsc->stencil, write);
 462
 463         DBG("%p: %s %p", batch, write ? "write" : "read", rsc);
 464
 465         if (write)
 466                 rsc->valid = true;
 467
 468         /* note, invalidate write batch, to avoid further writes to rsc
 469          * resulting in a write-after-read hazard.
 470          */
 471
 472         if (write) {
 473                 /* if we are pending read or write by any other batch: */
 474                 if (rsc->batch_mask & ~(1 << batch->idx)) {
 475                         struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
 476                         struct fd_batch *dep;
 477
 478                         if (rsc->write_batch && rsc->write_batch != batch)
 479                                 flush_write_batch(rsc);
 480
 481                         foreach_batch(dep, cache, rsc->batch_mask) {
 482                                 struct fd_batch *b = NULL;
 483                                 if (dep == batch)
 484                                         continue;
 485                                 /* note that batch_add_dep could flush and unref dep, so
 486                                  * we need to hold a reference to keep it live for the
 487                                  * fd_bc_invalidate_batch()
 488                                  */
 489                                 fd_batch_reference(&b, dep);
 490                                 fd_batch_add_dep(batch, b);
 491                                 fd_bc_invalidate_batch(b, false);
 492                                 fd_batch_reference_locked(&b, NULL);
 493                         }
 494                 }
 495                 fd_batch_reference_locked(&rsc->write_batch, batch);
 496         } else {
 497                 /* If reading a resource pending a write, go ahead and flush the
 498                  * writer.  This avoids situations where we end up having to
 499                  * flush the current batch in _resource_used()
 500                  */
 501                 if (rsc->write_batch && rsc->write_batch != batch)
 502                         flush_write_batch(rsc);
 503         }
 504
 505         if (rsc->batch_mask & (1 << batch->idx)) {
 506                 debug_assert(_mesa_set_search(batch->resources, rsc));
 507                 return;
 508         }
 509
 510         debug_assert(!_mesa_set_search(batch->resources, rsc));
 511
 512         _mesa_set_add(batch->resources, rsc);
 513         rsc->batch_mask |= (1 << batch->idx);
 514 }
 515
 516 void
 517 fd_batch_check_size(struct fd_batch *batch)
 518 {
 519         debug_assert(!batch->flushed);
 520
 521         if (unlikely(fd_mesa_debug & FD_DBG_FLUSH)) {
 522                 fd_batch_flush(batch, true);
 523                 return;
 524         }
 525
 526         if (fd_device_version(batch->ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS)
 527                 return;
 528
 529         struct fd_ringbuffer *ring = batch->draw;
 530         if ((ring->cur - ring->start) > (ring->size/4 - 0x1000))
 531                 fd_batch_flush(batch, true);
 532 }
 533
 534 /* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
 535  * been one since last draw:
 536  */
 537 void
 538 fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
 539 {
 540         if (batch->needs_wfi) {
 541                 if (batch->ctx->screen->gpu_id >= 500)
 542                         OUT_WFI5(ring);
 543                 else
 544                         OUT_WFI(ring);
 545                 batch->needs_wfi = false;
 546         }
 547 }