src/gallium/drivers/iris/iris_batch.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sublicense, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial portions
  14  * of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #include "iris_batch.h"
  26 #include "iris_bufmgr.h"
  27 #include "iris_context.h"
  28 #include "common/gen_decoder.h"
  29
  30 #include "drm-uapi/i915_drm.h"
  31
  32 #include "util/hash_table.h"
  33 #include "main/macros.h"
  34
  35 #include <errno.h>
  36 #include <xf86drm.h>
  37
  38 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
  39
  40 /**
  41  * Target sizes of the batch and state buffers.  We create the initial
  42  * buffers at these sizes, and flush when they're nearly full.  If we
  43  * underestimate how close we are to the end, and suddenly need more space
  44  * in the middle of a draw, we can grow the buffers, and finish the draw.
  45  * At that point, we'll be over our target size, so the next operation
  46  * should flush.  Each time we flush the batch, we recreate both buffers
  47  * at the original target size, so it doesn't grow without bound.
  48  */
  49 #define BATCH_SZ (20 * 1024)
  50 #define STATE_SZ (18 * 1024)
  51
  52 static void decode_batch(struct iris_batch *batch);
  53
  54 static void
  55 iris_batch_reset(struct iris_batch *batch);
  56
  57 UNUSED static void
  58 dump_validation_list(struct iris_batch *batch)
  59 {
  60    fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
  61
  62    for (int i = 0; i < batch->exec_count; i++) {
  63       assert(batch->validation_list[i].handle ==
  64              batch->exec_bos[i]->gem_handle);
  65       fprintf(stderr, "[%d] = %d %s %p\n", i,
  66               batch->validation_list[i].handle,
  67               batch->exec_bos[i]->name,
  68               batch->exec_bos[i]);
  69    }
  70 }
  71
  72 static bool
  73 uint_key_compare(const void *a, const void *b)
  74 {
  75    return a == b;
  76 }
  77
  78 static uint32_t
  79 uint_key_hash(const void *key)
  80 {
  81    return (uintptr_t) key;
  82 }
  83
  84 static void
  85 init_reloc_list(struct iris_reloc_list *rlist, int count)
  86 {
  87    rlist->reloc_count = 0;
  88    rlist->reloc_array_size = count;
  89    rlist->relocs = malloc(rlist->reloc_array_size *
  90                           sizeof(struct drm_i915_gem_relocation_entry));
  91 }
  92
  93 static void
  94 create_batch_buffer(struct iris_bufmgr *bufmgr,
  95                     struct iris_batch_buffer *buf,
  96                     const char *name, unsigned size)
  97 {
  98    buf->bo = iris_bo_alloc(bufmgr, name, size, 4096);
  99    buf->bo->kflags |= EXEC_OBJECT_CAPTURE;
 100    buf->map = iris_bo_map(NULL, buf->bo, MAP_READ | MAP_WRITE);
 101    buf->map_next = buf->map;
 102 }
 103
 104 void
 105 iris_init_batch(struct iris_batch *batch,
 106                 struct iris_screen *screen,
 107                 struct pipe_debug_callback *dbg,
 108                 uint8_t ring)
 109 {
 110    batch->screen = screen;
 111    batch->dbg = dbg;
 112
 113    /* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
 114    assert((ring & ~I915_EXEC_RING_MASK) == 0);
 115    assert(util_bitcount(ring) == 1);
 116    batch->ring = ring;
 117
 118    init_reloc_list(&batch->cmdbuf.relocs, 256);
 119    init_reloc_list(&batch->statebuf.relocs, 256);
 120
 121    batch->exec_count = 0;
 122    batch->exec_array_size = 100;
 123    batch->exec_bos =
 124       malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
 125    batch->validation_list =
 126       malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
 127
 128    if (unlikely(INTEL_DEBUG)) {
 129       batch->state_sizes =
 130          _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
 131    }
 132
 133    iris_batch_reset(batch);
 134 }
 135
 136 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
 137
 138 static unsigned
 139 add_exec_bo(struct iris_batch *batch, struct iris_bo *bo)
 140 {
 141    unsigned index = READ_ONCE(bo->index);
 142
 143    if (index < batch->exec_count && batch->exec_bos[index] == bo)
 144       return index;
 145
 146    /* May have been shared between multiple active batches */
 147    for (index = 0; index < batch->exec_count; index++) {
 148       if (batch->exec_bos[index] == bo)
 149          return index;
 150    }
 151
 152    iris_bo_reference(bo);
 153
 154    if (batch->exec_count == batch->exec_array_size) {
 155       batch->exec_array_size *= 2;
 156       batch->exec_bos =
 157          realloc(batch->exec_bos,
 158                  batch->exec_array_size * sizeof(batch->exec_bos[0]));
 159       batch->validation_list =
 160          realloc(batch->validation_list,
 161                  batch->exec_array_size * sizeof(batch->validation_list[0]));
 162    }
 163
 164    batch->validation_list[batch->exec_count] =
 165       (struct drm_i915_gem_exec_object2) {
 166          .handle = bo->gem_handle,
 167          .alignment = bo->align,
 168          .offset = bo->gtt_offset,
 169          .flags = bo->kflags,
 170       };
 171
 172    bo->index = batch->exec_count;
 173    batch->exec_bos[batch->exec_count] = bo;
 174    batch->aperture_space += bo->size;
 175
 176    return batch->exec_count++;
 177 }
 178
 179 static void
 180 iris_batch_reset(struct iris_batch *batch)
 181 {
 182    struct iris_screen *screen = batch->screen;
 183    struct iris_bufmgr *bufmgr = screen->bufmgr;
 184
 185    if (batch->last_cmd_bo != NULL) {
 186       iris_bo_unreference(batch->last_cmd_bo);
 187       batch->last_cmd_bo = NULL;
 188    }
 189    batch->last_cmd_bo = batch->cmdbuf.bo;
 190
 191    create_batch_buffer(bufmgr, &batch->cmdbuf, "command buffer", BATCH_SZ);
 192    create_batch_buffer(bufmgr, &batch->statebuf, "state buffer", STATE_SZ);
 193
 194    /* Avoid making 0 a valid state offset - otherwise the decoder will try
 195     * and decode data when we use offset 0 as a null pointer.
 196     */
 197    batch->statebuf.map_next += 1;
 198
 199    add_exec_bo(batch, batch->cmdbuf.bo);
 200    assert(batch->cmdbuf.bo->index == 0);
 201
 202    if (batch->state_sizes)
 203       _mesa_hash_table_clear(batch->state_sizes, NULL);
 204
 205    if (batch->ring == I915_EXEC_RENDER)
 206       batch->emit_state_base_address(batch);
 207 }
 208
 209 static void
 210 iris_batch_reset_and_clear_render_cache(struct iris_batch *batch)
 211 {
 212    iris_batch_reset(batch);
 213    // XXX: iris_render_cache_set_clear(batch);
 214 }
 215
 216 static void
 217 free_batch_buffer(struct iris_batch_buffer *buf)
 218 {
 219    iris_bo_unreference(buf->bo);
 220    buf->bo = NULL;
 221    buf->map = NULL;
 222    buf->map_next = NULL;
 223
 224    free(buf->relocs.relocs);
 225    buf->relocs.relocs = NULL;
 226    buf->relocs.reloc_array_size = 0;
 227 }
 228
 229 void
 230 iris_batch_free(struct iris_batch *batch)
 231 {
 232    for (int i = 0; i < batch->exec_count; i++) {
 233       iris_bo_unreference(batch->exec_bos[i]);
 234    }
 235    free(batch->exec_bos);
 236    free(batch->validation_list);
 237    free_batch_buffer(&batch->cmdbuf);
 238    free_batch_buffer(&batch->statebuf);
 239
 240    iris_bo_unreference(batch->last_cmd_bo);
 241
 242    if (batch->state_sizes)
 243       _mesa_hash_table_destroy(batch->state_sizes, NULL);
 244 }
 245
 246 /**
 247  * Finish copying the old batch/state buffer's contents to the new one
 248  * after we tried to "grow" the buffer in an earlier operation.
 249  */
 250 static void
 251 finish_growing_bos(struct iris_batch_buffer *buf)
 252 {
 253    struct iris_bo *old_bo = buf->partial_bo;
 254    if (!old_bo)
 255       return;
 256
 257    void *old_map = old_bo->map_cpu ? old_bo->map_cpu : old_bo->map_wc;
 258    memcpy(buf->map, old_map, buf->partial_bytes);
 259
 260    buf->partial_bo = NULL;
 261    buf->partial_bytes = 0;
 262
 263    iris_bo_unreference(old_bo);
 264 }
 265
 266 static unsigned
 267 buffer_bytes_used(struct iris_batch_buffer *buf)
 268 {
 269    return buf->map_next - buf->map;
 270 }
 271
 272 /**
 273  * Grow either the batch or state buffer to a new larger size.
 274  *
 275  * We can't actually grow buffers, so we allocate a new one, copy over
 276  * the existing contents, and update our lists to refer to the new one.
 277  *
 278  * Note that this is only temporary - each new batch recreates the buffers
 279  * at their original target size (BATCH_SZ or STATE_SZ).
 280  */
 281 static void
 282 grow_buffer(struct iris_batch *batch,
 283             struct iris_batch_buffer *buf,
 284             unsigned new_size)
 285 {
 286    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
 287    struct iris_bo *bo = buf->bo;
 288
 289    perf_debug(batch->dbg, "Growing %s - ran out of space\n", bo->name);
 290
 291    if (buf->partial_bo) {
 292       /* We've already grown once, and now we need to do it again.
 293        * Finish our last grow operation so we can start a new one.
 294        * This should basically never happen.
 295        */
 296       perf_debug(batch->dbg, "Had to grow multiple times");
 297       finish_growing_bos(buf);
 298    }
 299
 300    const unsigned existing_bytes = buffer_bytes_used(buf);
 301
 302    struct iris_bo *new_bo =
 303       iris_bo_alloc(bufmgr, bo->name, new_size, bo->align);
 304
 305    buf->map = iris_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE);
 306    buf->map_next = buf->map + existing_bytes;
 307
 308    /* Try to put the new BO at the same GTT offset as the old BO (which
 309     * we're throwing away, so it doesn't need to be there).
 310     *
 311     * This guarantees that our relocations continue to work: values we've
 312     * already written into the buffer, values we're going to write into the
 313     * buffer, and the validation/relocation lists all will match.
 314     *
 315     * Also preserve kflags for EXEC_OBJECT_CAPTURE.
 316     */
 317    new_bo->gtt_offset = bo->gtt_offset;
 318    new_bo->index = bo->index;
 319    new_bo->kflags = bo->kflags;
 320
 321    /* Batch/state buffers are per-context, and if we've run out of space,
 322     * we must have actually used them before, so...they will be in the list.
 323     */
 324    assert(bo->index < batch->exec_count);
 325    assert(batch->exec_bos[bo->index] == bo);
 326
 327    /* Update the validation list to use the new BO. */
 328    batch->exec_bos[bo->index] = new_bo;
 329    batch->validation_list[bo->index].handle = new_bo->gem_handle;
 330
 331    /* Exchange the two BOs...without breaking pointers to the old BO.
 332     *
 333     * Consider this scenario:
 334     *
 335     * 1. Somebody calls iris_state_batch() to get a region of memory, and
 336     *    and then creates a iris_address pointing to iris->batch.state.bo.
 337     * 2. They then call iris_state_batch() a second time, which happens to
 338     *    grow and replace the state buffer.  They then try to emit a
 339     *    relocation to their first section of memory.
 340     *
 341     * If we replace the iris->batch.state.bo pointer at step 2, we would
 342     * break the address created in step 1.  They'd have a pointer to the
 343     * old destroyed BO.  Emitting a relocation would add this dead BO to
 344     * the validation list...causing /both/ statebuffers to be in the list,
 345     * and all kinds of disasters.
 346     *
 347     * This is not a contrived case - BLORP vertex data upload hits this.
 348     *
 349     * There are worse scenarios too.  Fences for GL sync objects reference
 350     * iris->batch.batch.bo.  If we replaced the batch pointer when growing,
 351     * we'd need to chase down every fence and update it to point to the
 352     * new BO.  Otherwise, it would refer to a "batch" that never actually
 353     * gets submitted, and would fail to trigger.
 354     *
 355     * To work around both of these issues, we transmutate the buffers in
 356     * place, making the existing struct iris_bo represent the new buffer,
 357     * and "new_bo" represent the old BO.  This is highly unusual, but it
 358     * seems like a necessary evil.
 359     *
 360     * We also defer the memcpy of the existing batch's contents.  Callers
 361     * may make multiple iris_state_batch calls, and retain pointers to the
 362     * old BO's map.  We'll perform the memcpy in finish_growing_bo() when
 363     * we finally submit the batch, at which point we've finished uploading
 364     * state, and nobody should have any old references anymore.
 365     *
 366     * To do that, we keep a reference to the old BO in grow->partial_bo,
 367     * and store the number of bytes to copy in grow->partial_bytes.  We
 368     * can monkey with the refcounts directly without atomics because these
 369     * are per-context BOs and they can only be touched by this thread.
 370     */
 371    assert(new_bo->refcount == 1);
 372    new_bo->refcount = bo->refcount;
 373    bo->refcount = 1;
 374
 375    struct iris_bo tmp;
 376    memcpy(&tmp, bo, sizeof(struct iris_bo));
 377    memcpy(bo, new_bo, sizeof(struct iris_bo));
 378    memcpy(new_bo, &tmp, sizeof(struct iris_bo));
 379
 380    buf->partial_bo = new_bo; /* the one reference of the OLD bo */
 381    buf->partial_bytes = existing_bytes;
 382 }
 383
 384 static void
 385 require_buffer_space(struct iris_batch *batch,
 386                      struct iris_batch_buffer *buf,
 387                      unsigned size,
 388                      unsigned flush_threshold,
 389                      unsigned max_buffer_size)
 390 {
 391    const unsigned required_bytes = buffer_bytes_used(buf) + size;
 392
 393    if (!batch->no_wrap && required_bytes >= flush_threshold) {
 394       iris_batch_flush(batch);
 395    } else if (required_bytes >= buf->bo->size) {
 396       grow_buffer(batch, buf,
 397                   MIN2(buf->bo->size + buf->bo->size / 2, max_buffer_size));
 398       assert(required_bytes < buf->bo->size);
 399    }
 400 }
 401
 402
 403 void
 404 iris_require_command_space(struct iris_batch *batch, unsigned size)
 405 {
 406    require_buffer_space(batch, &batch->cmdbuf, size, BATCH_SZ, MAX_BATCH_SIZE);
 407 }
 408
 409 /**
 410  * Reserve some space in the statebuffer, or flush.
 411  *
 412  * This is used to estimate when we're near the end of the batch,
 413  * so we can flush early.
 414  */
 415 void
 416 iris_require_state_space(struct iris_batch *batch, unsigned size)
 417 {
 418    require_buffer_space(batch, &batch->statebuf, size, STATE_SZ,
 419                         MAX_STATE_SIZE);
 420 }
 421
 422 void
 423 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
 424 {
 425    iris_require_command_space(batch, size);
 426    memcpy(batch->cmdbuf.map_next, data, size);
 427    batch->cmdbuf.map_next += size;
 428 }
 429
 430 /**
 431  * Called from iris_batch_flush before emitting MI_BATCHBUFFER_END and
 432  * sending it off.
 433  *
 434  * This function can emit state (say, to preserve registers that aren't saved
 435  * between batches).
 436  */
 437 static void
 438 iris_finish_batch(struct iris_batch *batch)
 439 {
 440    batch->no_wrap = true;
 441
 442    // XXX: ISP DIS
 443
 444    /* Emit MI_BATCH_BUFFER_END to finish our batch.  Note that execbuf2
 445     * requires our batch size to be QWord aligned, so we pad it out if
 446     * necessary by emitting an extra MI_NOOP after the end.
 447     */
 448    const uint32_t MI_BATCH_BUFFER_END_AND_NOOP[2]  = { (0xA << 23), 0 };
 449    const bool qword_aligned = (buffer_bytes_used(&batch->cmdbuf) % 8) == 0;
 450    iris_batch_emit(batch, MI_BATCH_BUFFER_END_AND_NOOP, qword_aligned ? 8 : 4);
 451
 452    batch->no_wrap = false;
 453 }
 454
 455 static int
 456 submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd)
 457 {
 458    iris_bo_unmap(batch->cmdbuf.bo);
 459    iris_bo_unmap(batch->statebuf.bo);
 460
 461    /* The requirement for using I915_EXEC_NO_RELOC are:
 462     *
 463     *   The addresses written in the objects must match the corresponding
 464     *   reloc.gtt_offset which in turn must match the corresponding
 465     *   execobject.offset.
 466     *
 467     *   Any render targets written to in the batch must be flagged with
 468     *   EXEC_OBJECT_WRITE.
 469     *
 470     *   To avoid stalling, execobject.offset should match the current
 471     *   address of that object within the active context.
 472     */
 473    /* Set statebuffer relocations */
 474    const unsigned state_index = batch->statebuf.bo->index;
 475    if (state_index < batch->exec_count &&
 476        batch->exec_bos[state_index] == batch->statebuf.bo) {
 477       struct drm_i915_gem_exec_object2 *entry =
 478          &batch->validation_list[state_index];
 479       assert(entry->handle == batch->statebuf.bo->gem_handle);
 480       entry->relocation_count = batch->statebuf.relocs.reloc_count;
 481       entry->relocs_ptr = (uintptr_t) batch->statebuf.relocs.relocs;
 482    }
 483
 484    /* Set batchbuffer relocations */
 485    struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
 486    assert(entry->handle == batch->cmdbuf.bo->gem_handle);
 487    entry->relocation_count = batch->cmdbuf.relocs.reloc_count;
 488    entry->relocs_ptr = (uintptr_t) batch->cmdbuf.relocs.relocs;
 489
 490    struct drm_i915_gem_execbuffer2 execbuf = {
 491       .buffers_ptr = (uintptr_t) batch->validation_list,
 492       .buffer_count = batch->exec_count,
 493       .batch_start_offset = 0,
 494       .batch_len = buffer_bytes_used(&batch->cmdbuf),
 495       .flags = batch->ring |
 496                I915_EXEC_NO_RELOC |
 497                I915_EXEC_BATCH_FIRST |
 498                I915_EXEC_HANDLE_LUT,
 499       .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */
 500    };
 501
 502    unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
 503
 504    if (in_fence_fd != -1) {
 505       execbuf.rsvd2 = in_fence_fd;
 506       execbuf.flags |= I915_EXEC_FENCE_IN;
 507    }
 508
 509    if (out_fence_fd != NULL) {
 510       cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2_WR;
 511       *out_fence_fd = -1;
 512       execbuf.flags |= I915_EXEC_FENCE_OUT;
 513    }
 514
 515 #if 1
 516    int ret = drm_ioctl(batch->screen->fd, cmd, &execbuf);
 517    if (ret != 0) {
 518       ret = -errno;
 519       DBG("execbuf FAILED: errno = %d\n", -ret);
 520    } else {
 521       DBG("execbuf succeeded\n");
 522    }
 523 #else
 524    int ret = 0;
 525    fprintf(stderr, "execbuf disabled for now\n");
 526 #endif
 527
 528    for (int i = 0; i < batch->exec_count; i++) {
 529       struct iris_bo *bo = batch->exec_bos[i];
 530
 531       bo->idle = false;
 532       bo->index = -1;
 533
 534       /* Update iris_bo::gtt_offset */
 535       if (batch->validation_list[i].offset != bo->gtt_offset) {
 536          DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%llx\n",
 537              bo->gem_handle, bo->gtt_offset,
 538              batch->validation_list[i].offset);
 539          bo->gtt_offset = batch->validation_list[i].offset;
 540       }
 541    }
 542
 543    if (ret == 0 && out_fence_fd != NULL)
 544       *out_fence_fd = execbuf.rsvd2 >> 32;
 545
 546    return ret;
 547 }
 548
 549 /**
 550  * The in_fence_fd is ignored if -1.  Otherwise this function takes ownership
 551  * of the fd.
 552  *
 553  * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership
 554  * of the returned fd.
 555  */
 556 int
 557 _iris_batch_flush_fence(struct iris_batch *batch,
 558                         int in_fence_fd, int *out_fence_fd,
 559                         const char *file, int line)
 560 {
 561    if (buffer_bytes_used(&batch->cmdbuf) == 0)
 562       return 0;
 563
 564    /* Check that we didn't just wrap our batchbuffer at a bad time. */
 565    assert(!batch->no_wrap);
 566
 567    iris_finish_batch(batch);
 568
 569    if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) {
 570       int bytes_for_commands = buffer_bytes_used(&batch->cmdbuf);
 571       int bytes_for_state = buffer_bytes_used(&batch->statebuf);
 572       fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt),"
 573               " %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture),"
 574               " %4d batch relocs, %4d state relocs\n", file, line,
 575               bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ,
 576               bytes_for_state, 100.0f * bytes_for_state / STATE_SZ,
 577               batch->exec_count,
 578               (float) batch->aperture_space / (1024 * 1024),
 579               batch->cmdbuf.relocs.reloc_count,
 580               batch->statebuf.relocs.reloc_count);
 581    }
 582
 583    int ret = submit_batch(batch, in_fence_fd, out_fence_fd);
 584    if (ret < 0)
 585       return ret;
 586
 587    //throttle(iris);
 588
 589    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
 590       decode_batch(batch);
 591
 592    //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
 593       //iris_check_for_reset(ice);
 594
 595    if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
 596       dbg_printf("waiting for idle\n");
 597       iris_bo_wait_rendering(batch->cmdbuf.bo);
 598    }
 599
 600    /* Clean up after the batch we submitted and prepare for a new one. */
 601    for (int i = 0; i < batch->exec_count; i++) {
 602       iris_bo_unreference(batch->exec_bos[i]);
 603       batch->exec_bos[i] = NULL;
 604    }
 605    batch->cmdbuf.relocs.reloc_count = 0;
 606    batch->statebuf.relocs.reloc_count = 0;
 607    batch->exec_count = 0;
 608    batch->aperture_space = 0;
 609
 610    iris_bo_unreference(batch->statebuf.bo);
 611
 612    /* Start a new batch buffer. */
 613    iris_batch_reset_and_clear_render_cache(batch);
 614
 615    return 0;
 616 }
 617
 618 bool
 619 iris_batch_references(struct iris_batch *batch, struct iris_bo *bo)
 620 {
 621    unsigned index = READ_ONCE(bo->index);
 622    if (index < batch->exec_count && batch->exec_bos[index] == bo)
 623       return true;
 624
 625    for (int i = 0; i < batch->exec_count; i++) {
 626       if (batch->exec_bos[i] == bo)
 627          return true;
 628    }
 629    return false;
 630 }
 631
 632 /*  This is the only way buffers get added to the validate list.
 633  */
 634 static uint64_t
 635 emit_reloc(struct iris_batch *batch,
 636            struct iris_reloc_list *rlist, uint32_t offset,
 637            struct iris_bo *target, uint32_t target_offset,
 638            unsigned int reloc_flags)
 639 {
 640    assert(target != NULL);
 641
 642    unsigned int index = add_exec_bo(batch, target);
 643    struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
 644
 645    if (target->kflags & EXEC_OBJECT_PINNED) {
 646       assert(entry->offset == target->gtt_offset);
 647       return entry->offset + target_offset;
 648    }
 649
 650    if (rlist->reloc_count == rlist->reloc_array_size) {
 651       rlist->reloc_array_size *= 2;
 652       rlist->relocs = realloc(rlist->relocs,
 653                               rlist->reloc_array_size *
 654                               sizeof(struct drm_i915_gem_relocation_entry));
 655    }
 656
 657    rlist->relocs[rlist->reloc_count++] =
 658       (struct drm_i915_gem_relocation_entry) {
 659          .offset = offset,
 660          .delta = target_offset,
 661          .target_handle = index,
 662          .presumed_offset = entry->offset,
 663       };
 664
 665    /* Using the old buffer offset, write in what the right data would be, in
 666     * case the buffer doesn't move and we can short-circuit the relocation
 667     * processing in the kernel
 668     */
 669    return entry->offset + target_offset;
 670 }
 671
 672 void
 673 iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo)
 674 {
 675    assert(bo->kflags & EXEC_OBJECT_PINNED);
 676    add_exec_bo(batch, bo);
 677 }
 678
 679 uint64_t
 680 iris_batch_reloc(struct iris_batch *batch, uint32_t batch_offset,
 681                  struct iris_bo *target, uint32_t target_offset,
 682                  unsigned int reloc_flags)
 683 {
 684    assert(batch_offset <= batch->cmdbuf.bo->size - sizeof(uint32_t));
 685
 686    return emit_reloc(batch, &batch->cmdbuf.relocs, batch_offset,
 687                      target, target_offset, reloc_flags);
 688 }
 689
 690 uint64_t
 691 iris_state_reloc(struct iris_batch *batch, uint32_t state_offset,
 692                  struct iris_bo *target, uint32_t target_offset,
 693                  unsigned int reloc_flags)
 694 {
 695    assert(state_offset <= batch->statebuf.bo->size - sizeof(uint32_t));
 696
 697    return emit_reloc(batch, &batch->statebuf.relocs, state_offset,
 698                      target, target_offset, reloc_flags);
 699 }
 700
 701
 702 static uint32_t
 703 iris_state_entry_size(struct iris_batch *batch, uint32_t offset)
 704 {
 705    struct hash_entry *entry =
 706       _mesa_hash_table_search(batch->state_sizes, (void *)(uintptr_t) offset);
 707    return entry ? (uintptr_t) entry->data : 0;
 708 }
 709
 710 /**
 711  * Allocates a block of space in the batchbuffer for indirect state.
 712  */
 713 void *
 714 iris_alloc_state(struct iris_batch *batch,
 715                  int size, int alignment,
 716                  uint32_t *out_offset)
 717 {
 718    assert(size < batch->statebuf.bo->size);
 719
 720    const unsigned existing_bytes = buffer_bytes_used(&batch->statebuf);
 721    unsigned aligned_size =
 722       ALIGN(existing_bytes, alignment) - existing_bytes + size;
 723
 724    require_buffer_space(batch, &batch->statebuf, aligned_size,
 725                         STATE_SZ, MAX_STATE_SIZE);
 726
 727    unsigned offset = ALIGN(buffer_bytes_used(&batch->statebuf), alignment);
 728
 729    if (unlikely(batch->state_sizes)) {
 730       _mesa_hash_table_insert(batch->state_sizes,
 731                               (void *) (uintptr_t) offset,
 732                               (void *) (uintptr_t) size);
 733    }
 734
 735    batch->statebuf.map_next += aligned_size;
 736
 737    *out_offset = offset;
 738    return batch->statebuf.map + offset;
 739 }
 740
 741 uint32_t
 742 iris_emit_state(struct iris_batch *batch,
 743                 const void *data,
 744                 int size, int alignment)
 745 {
 746    uint32_t out_offset;
 747    void *dest = iris_alloc_state(batch, size, alignment, &out_offset);
 748    memcpy(dest, data, size);
 749    return out_offset;
 750 }
 751
 752 static void
 753 decode_batch(struct iris_batch *batch)
 754 {
 755    // XXX: decode the batch
 756 }