src/intel/vulkan/anv_batch_chain.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28 #include <fcntl.h>
  29
  30 #include "anv_private.h"
  31
  32 #include "genxml/gen8_pack.h"
  33
  34 #include "util/debug.h"
  35
  36 /** \file anv_batch_chain.c
  37  *
  38  * This file contains functions related to anv_cmd_buffer as a data
  39  * structure.  This involves everything required to create and destroy
  40  * the actual batch buffers as well as link them together and handle
  41  * relocations and surface state.  It specifically does *not* contain any
  42  * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
  43  */
  44
  45 /*-----------------------------------------------------------------------*
  46  * Functions related to anv_reloc_list
  47  *-----------------------------------------------------------------------*/
  48
  49 VkResult
  50 anv_reloc_list_init(struct anv_reloc_list *list,
  51                     const VkAllocationCallbacks *alloc)
  52 {
  53    memset(list, 0, sizeof(*list));
  54    return VK_SUCCESS;
  55 }
  56
  57 static VkResult
  58 anv_reloc_list_init_clone(struct anv_reloc_list *list,
  59                           const VkAllocationCallbacks *alloc,
  60                           const struct anv_reloc_list *other_list)
  61 {
  62    list->num_relocs = other_list->num_relocs;
  63    list->array_length = other_list->array_length;
  64
  65    if (list->num_relocs > 0) {
  66       list->relocs =
  67          vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
  68                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  69       if (list->relocs == NULL)
  70          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  71
  72       list->reloc_bos =
  73          vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
  74                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
  75       if (list->reloc_bos == NULL) {
  76          vk_free(alloc, list->relocs);
  77          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  78       }
  79
  80       memcpy(list->relocs, other_list->relocs,
  81              list->array_length * sizeof(*list->relocs));
  82       memcpy(list->reloc_bos, other_list->reloc_bos,
  83              list->array_length * sizeof(*list->reloc_bos));
  84    } else {
  85       list->relocs = NULL;
  86       list->reloc_bos = NULL;
  87    }
  88
  89    if (other_list->deps) {
  90       list->deps = _mesa_set_clone(other_list->deps, NULL);
  91       if (!list->deps) {
  92          vk_free(alloc, list->relocs);
  93          vk_free(alloc, list->reloc_bos);
  94          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  95       }
  96    } else {
  97       list->deps = NULL;
  98    }
  99
 100    return VK_SUCCESS;
 101 }
 102
 103 void
 104 anv_reloc_list_finish(struct anv_reloc_list *list,
 105                       const VkAllocationCallbacks *alloc)
 106 {
 107    vk_free(alloc, list->relocs);
 108    vk_free(alloc, list->reloc_bos);
 109    if (list->deps != NULL)
 110       _mesa_set_destroy(list->deps, NULL);
 111 }
 112
 113 static VkResult
 114 anv_reloc_list_grow(struct anv_reloc_list *list,
 115                     const VkAllocationCallbacks *alloc,
 116                     size_t num_additional_relocs)
 117 {
 118    if (list->num_relocs + num_additional_relocs <= list->array_length)
 119       return VK_SUCCESS;
 120
 121    size_t new_length = MAX2(256, list->array_length * 2);
 122    while (new_length < list->num_relocs + num_additional_relocs)
 123       new_length *= 2;
 124
 125    struct drm_i915_gem_relocation_entry *new_relocs =
 126       vk_realloc(alloc, list->relocs,
 127                  new_length * sizeof(*list->relocs), 8,
 128                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 129    if (new_relocs == NULL)
 130       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 131    list->relocs = new_relocs;
 132
 133    struct anv_bo **new_reloc_bos =
 134       vk_realloc(alloc, list->reloc_bos,
 135                  new_length * sizeof(*list->reloc_bos), 8,
 136                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 137    if (new_reloc_bos == NULL)
 138       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 139    list->reloc_bos = new_reloc_bos;
 140
 141    list->array_length = new_length;
 142
 143    return VK_SUCCESS;
 144 }
 145
 146 VkResult
 147 anv_reloc_list_add(struct anv_reloc_list *list,
 148                    const VkAllocationCallbacks *alloc,
 149                    uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
 150 {
 151    struct drm_i915_gem_relocation_entry *entry;
 152    int index;
 153
 154    if (target_bo->flags & EXEC_OBJECT_PINNED) {
 155       if (list->deps == NULL) {
 156          list->deps = _mesa_pointer_set_create(NULL);
 157          if (unlikely(list->deps == NULL))
 158             return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 159       }
 160       _mesa_set_add(list->deps, target_bo);
 161       return VK_SUCCESS;
 162    }
 163
 164    VkResult result = anv_reloc_list_grow(list, alloc, 1);
 165    if (result != VK_SUCCESS)
 166       return result;
 167
 168    /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
 169    index = list->num_relocs++;
 170    list->reloc_bos[index] = target_bo;
 171    entry = &list->relocs[index];
 172    entry->target_handle = target_bo->gem_handle;
 173    entry->delta = delta;
 174    entry->offset = offset;
 175    entry->presumed_offset = target_bo->offset;
 176    entry->read_domains = 0;
 177    entry->write_domain = 0;
 178    VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
 179
 180    return VK_SUCCESS;
 181 }
 182
 183 static VkResult
 184 anv_reloc_list_append(struct anv_reloc_list *list,
 185                       const VkAllocationCallbacks *alloc,
 186                       struct anv_reloc_list *other, uint32_t offset)
 187 {
 188    VkResult result = anv_reloc_list_grow(list, alloc, other->num_relocs);
 189    if (result != VK_SUCCESS)
 190       return result;
 191
 192    if (other->num_relocs > 0) {
 193       memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
 194              other->num_relocs * sizeof(other->relocs[0]));
 195       memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
 196              other->num_relocs * sizeof(other->reloc_bos[0]));
 197
 198       for (uint32_t i = 0; i < other->num_relocs; i++)
 199          list->relocs[i + list->num_relocs].offset += offset;
 200
 201       list->num_relocs += other->num_relocs;
 202    }
 203
 204    if (other->deps) {
 205       if (list->deps == NULL) {
 206          list->deps = _mesa_pointer_set_create(NULL);
 207          if (unlikely(list->deps == NULL))
 208             return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 209       }
 210       set_foreach(other->deps, entry)
 211          _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
 212    }
 213
 214    return VK_SUCCESS;
 215 }
 216
 217 /*-----------------------------------------------------------------------*
 218  * Functions related to anv_batch
 219  *-----------------------------------------------------------------------*/
 220
 221 void *
 222 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
 223 {
 224    if (batch->next + num_dwords * 4 > batch->end) {
 225       VkResult result = batch->extend_cb(batch, batch->user_data);
 226       if (result != VK_SUCCESS) {
 227          anv_batch_set_error(batch, result);
 228          return NULL;
 229       }
 230    }
 231
 232    void *p = batch->next;
 233
 234    batch->next += num_dwords * 4;
 235    assert(batch->next <= batch->end);
 236
 237    return p;
 238 }
 239
 240 uint64_t
 241 anv_batch_emit_reloc(struct anv_batch *batch,
 242                      void *location, struct anv_bo *bo, uint32_t delta)
 243 {
 244    VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
 245                                         location - batch->start, bo, delta);
 246    if (result != VK_SUCCESS) {
 247       anv_batch_set_error(batch, result);
 248       return 0;
 249    }
 250
 251    return bo->offset + delta;
 252 }
 253
 254 void
 255 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
 256 {
 257    uint32_t size, offset;
 258
 259    size = other->next - other->start;
 260    assert(size % 4 == 0);
 261
 262    if (batch->next + size > batch->end) {
 263       VkResult result = batch->extend_cb(batch, batch->user_data);
 264       if (result != VK_SUCCESS) {
 265          anv_batch_set_error(batch, result);
 266          return;
 267       }
 268    }
 269
 270    assert(batch->next + size <= batch->end);
 271
 272    VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
 273    memcpy(batch->next, other->start, size);
 274
 275    offset = batch->next - batch->start;
 276    VkResult result = anv_reloc_list_append(batch->relocs, batch->alloc,
 277                                            other->relocs, offset);
 278    if (result != VK_SUCCESS) {
 279       anv_batch_set_error(batch, result);
 280       return;
 281    }
 282
 283    batch->next += size;
 284 }
 285
 286 /*-----------------------------------------------------------------------*
 287  * Functions related to anv_batch_bo
 288  *-----------------------------------------------------------------------*/
 289
 290 static VkResult
 291 anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
 292                     struct anv_batch_bo **bbo_out)
 293 {
 294    VkResult result;
 295
 296    struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
 297                                         8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 298    if (bbo == NULL)
 299       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 300
 301    result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
 302                               ANV_CMD_BUFFER_BATCH_SIZE);
 303    if (result != VK_SUCCESS)
 304       goto fail_alloc;
 305
 306    result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
 307    if (result != VK_SUCCESS)
 308       goto fail_bo_alloc;
 309
 310    *bbo_out = bbo;
 311
 312    return VK_SUCCESS;
 313
 314  fail_bo_alloc:
 315    anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
 316  fail_alloc:
 317    vk_free(&cmd_buffer->pool->alloc, bbo);
 318
 319    return result;
 320 }
 321
 322 static VkResult
 323 anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
 324                    const struct anv_batch_bo *other_bbo,
 325                    struct anv_batch_bo **bbo_out)
 326 {
 327    VkResult result;
 328
 329    struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
 330                                         8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 331    if (bbo == NULL)
 332       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 333
 334    result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
 335                               other_bbo->bo.size);
 336    if (result != VK_SUCCESS)
 337       goto fail_alloc;
 338
 339    result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
 340                                       &other_bbo->relocs);
 341    if (result != VK_SUCCESS)
 342       goto fail_bo_alloc;
 343
 344    bbo->length = other_bbo->length;
 345    memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
 346
 347    *bbo_out = bbo;
 348
 349    return VK_SUCCESS;
 350
 351  fail_bo_alloc:
 352    anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
 353  fail_alloc:
 354    vk_free(&cmd_buffer->pool->alloc, bbo);
 355
 356    return result;
 357 }
 358
 359 static void
 360 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
 361                    size_t batch_padding)
 362 {
 363    batch->next = batch->start = bbo->bo.map;
 364    batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
 365    batch->relocs = &bbo->relocs;
 366    bbo->relocs.num_relocs = 0;
 367    _mesa_set_clear(bbo->relocs.deps, NULL);
 368 }
 369
 370 static void
 371 anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
 372                       size_t batch_padding)
 373 {
 374    batch->start = bbo->bo.map;
 375    batch->next = bbo->bo.map + bbo->length;
 376    batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
 377    batch->relocs = &bbo->relocs;
 378 }
 379
 380 static void
 381 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
 382 {
 383    assert(batch->start == bbo->bo.map);
 384    bbo->length = batch->next - batch->start;
 385    VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
 386 }
 387
 388 static VkResult
 389 anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
 390                   struct anv_batch *batch, size_t aditional,
 391                   size_t batch_padding)
 392 {
 393    assert(batch->start == bbo->bo.map);
 394    bbo->length = batch->next - batch->start;
 395
 396    size_t new_size = bbo->bo.size;
 397    while (new_size <= bbo->length + aditional + batch_padding)
 398       new_size *= 2;
 399
 400    if (new_size == bbo->bo.size)
 401       return VK_SUCCESS;
 402
 403    struct anv_bo new_bo;
 404    VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
 405                                        &new_bo, new_size);
 406    if (result != VK_SUCCESS)
 407       return result;
 408
 409    memcpy(new_bo.map, bbo->bo.map, bbo->length);
 410
 411    anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
 412
 413    bbo->bo = new_bo;
 414    anv_batch_bo_continue(bbo, batch, batch_padding);
 415
 416    return VK_SUCCESS;
 417 }
 418
 419 static void
 420 anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
 421                   struct anv_batch_bo *prev_bbo,
 422                   struct anv_batch_bo *next_bbo,
 423                   uint32_t next_bbo_offset)
 424 {
 425    const uint32_t bb_start_offset =
 426       prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4;
 427    ASSERTED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset;
 428
 429    /* Make sure we're looking at a MI_BATCH_BUFFER_START */
 430    assert(((*bb_start >> 29) & 0x07) == 0);
 431    assert(((*bb_start >> 23) & 0x3f) == 49);
 432
 433    if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
 434       assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED);
 435       assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED);
 436
 437       write_reloc(cmd_buffer->device,
 438                   prev_bbo->bo.map + bb_start_offset + 4,
 439                   next_bbo->bo.offset + next_bbo_offset, true);
 440    } else {
 441       uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1;
 442       assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4);
 443
 444       prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo;
 445       prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset;
 446
 447       /* Use a bogus presumed offset to force a relocation */
 448       prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1;
 449    }
 450 }
 451
 452 static void
 453 anv_batch_bo_destroy(struct anv_batch_bo *bbo,
 454                      struct anv_cmd_buffer *cmd_buffer)
 455 {
 456    anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
 457    anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
 458    vk_free(&cmd_buffer->pool->alloc, bbo);
 459 }
 460
 461 static VkResult
 462 anv_batch_bo_list_clone(const struct list_head *list,
 463                         struct anv_cmd_buffer *cmd_buffer,
 464                         struct list_head *new_list)
 465 {
 466    VkResult result = VK_SUCCESS;
 467
 468    list_inithead(new_list);
 469
 470    struct anv_batch_bo *prev_bbo = NULL;
 471    list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
 472       struct anv_batch_bo *new_bbo = NULL;
 473       result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
 474       if (result != VK_SUCCESS)
 475          break;
 476       list_addtail(&new_bbo->link, new_list);
 477
 478       if (prev_bbo)
 479          anv_batch_bo_link(cmd_buffer, prev_bbo, new_bbo, 0);
 480
 481       prev_bbo = new_bbo;
 482    }
 483
 484    if (result != VK_SUCCESS) {
 485       list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
 486          anv_batch_bo_destroy(bbo, cmd_buffer);
 487    }
 488
 489    return result;
 490 }
 491
 492 /*-----------------------------------------------------------------------*
 493  * Functions related to anv_batch_bo
 494  *-----------------------------------------------------------------------*/
 495
 496 static struct anv_batch_bo *
 497 anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
 498 {
 499    return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
 500 }
 501
 502 struct anv_address
 503 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
 504 {
 505    struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
 506    return (struct anv_address) {
 507       .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
 508       .offset = bt_block->offset,
 509    };
 510 }
 511
 512 static void
 513 emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
 514                         struct anv_bo *bo, uint32_t offset)
 515 {
 516    /* In gen8+ the address field grew to two dwords to accomodate 48 bit
 517     * offsets. The high 16 bits are in the last dword, so we can use the gen8
 518     * version in either case, as long as we set the instruction length in the
 519     * header accordingly.  This means that we always emit three dwords here
 520     * and all the padding and adjustment we do in this file works for all
 521     * gens.
 522     */
 523
 524 #define GEN7_MI_BATCH_BUFFER_START_length      2
 525 #define GEN7_MI_BATCH_BUFFER_START_length_bias      2
 526
 527    const uint32_t gen7_length =
 528       GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
 529    const uint32_t gen8_length =
 530       GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
 531
 532    anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
 533       bbs.DWordLength               = cmd_buffer->device->info.gen < 8 ?
 534                                       gen7_length : gen8_length;
 535       bbs.SecondLevelBatchBuffer    = Firstlevelbatch;
 536       bbs.AddressSpaceIndicator     = ASI_PPGTT;
 537       bbs.BatchBufferStartAddress   = (struct anv_address) { bo, offset };
 538    }
 539 }
 540
 541 static void
 542 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
 543                              struct anv_batch_bo *bbo)
 544 {
 545    struct anv_batch *batch = &cmd_buffer->batch;
 546    struct anv_batch_bo *current_bbo =
 547       anv_cmd_buffer_current_batch_bo(cmd_buffer);
 548
 549    /* We set the end of the batch a little short so we would be sure we
 550     * have room for the chaining command.  Since we're about to emit the
 551     * chaining command, let's set it back where it should go.
 552     */
 553    batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
 554    assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
 555
 556    emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
 557
 558    anv_batch_bo_finish(current_bbo, batch);
 559 }
 560
 561 static VkResult
 562 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
 563 {
 564    struct anv_cmd_buffer *cmd_buffer = _data;
 565    struct anv_batch_bo *new_bbo;
 566
 567    VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
 568    if (result != VK_SUCCESS)
 569       return result;
 570
 571    struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
 572    if (seen_bbo == NULL) {
 573       anv_batch_bo_destroy(new_bbo, cmd_buffer);
 574       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 575    }
 576    *seen_bbo = new_bbo;
 577
 578    cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
 579
 580    list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
 581
 582    anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
 583
 584    return VK_SUCCESS;
 585 }
 586
 587 static VkResult
 588 anv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data)
 589 {
 590    struct anv_cmd_buffer *cmd_buffer = _data;
 591    struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
 592
 593    anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096,
 594                      GEN8_MI_BATCH_BUFFER_START_length * 4);
 595
 596    return VK_SUCCESS;
 597 }
 598
 599 /** Allocate a binding table
 600  *
 601  * This function allocates a binding table.  This is a bit more complicated
 602  * than one would think due to a combination of Vulkan driver design and some
 603  * unfortunate hardware restrictions.
 604  *
 605  * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for
 606  * the binding table pointer which means that all binding tables need to live
 607  * in the bottom 64k of surface state base address.  The way the GL driver has
 608  * classically dealt with this restriction is to emit all surface states
 609  * on-the-fly into the batch and have a batch buffer smaller than 64k.  This
 610  * isn't really an option in Vulkan for a couple of reasons:
 611  *
 612  *  1) In Vulkan, we have growing (or chaining) batches so surface states have
 613  *     to live in their own buffer and we have to be able to re-emit
 614  *     STATE_BASE_ADDRESS as needed which requires a full pipeline stall.  In
 615  *     order to avoid emitting STATE_BASE_ADDRESS any more often than needed
 616  *     (it's not that hard to hit 64k of just binding tables), we allocate
 617  *     surface state objects up-front when VkImageView is created.  In order
 618  *     for this to work, surface state objects need to be allocated from a
 619  *     global buffer.
 620  *
 621  *  2) We tried to design the surface state system in such a way that it's
 622  *     already ready for bindless texturing.  The way bindless texturing works
 623  *     on our hardware is that you have a big pool of surface state objects
 624  *     (with its own state base address) and the bindless handles are simply
 625  *     offsets into that pool.  With the architecture we chose, we already
 626  *     have that pool and it's exactly the same pool that we use for regular
 627  *     surface states so we should already be ready for bindless.
 628  *
 629  *  3) For render targets, we need to be able to fill out the surface states
 630  *     later in vkBeginRenderPass so that we can assign clear colors
 631  *     correctly.  One way to do this would be to just create the surface
 632  *     state data and then repeatedly copy it into the surface state BO every
 633  *     time we have to re-emit STATE_BASE_ADDRESS.  While this works, it's
 634  *     rather annoying and just being able to allocate them up-front and
 635  *     re-use them for the entire render pass.
 636  *
 637  * While none of these are technically blockers for emitting state on the fly
 638  * like we do in GL, the ability to have a single surface state pool is
 639  * simplifies things greatly.  Unfortunately, it comes at a cost...
 640  *
 641  * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't
 642  * place the binding tables just anywhere in surface state base address.
 643  * Because 64k isn't a whole lot of space, we can't simply restrict the
 644  * surface state buffer to 64k, we have to be more clever.  The solution we've
 645  * chosen is to have a block pool with a maximum size of 2G that starts at
 646  * zero and grows in both directions.  All surface states are allocated from
 647  * the top of the pool (positive offsets) and we allocate blocks (< 64k) of
 648  * binding tables from the bottom of the pool (negative offsets).  Every time
 649  * we allocate a new binding table block, we set surface state base address to
 650  * point to the bottom of the binding table block.  This way all of the
 651  * binding tables in the block are in the bottom 64k of surface state base
 652  * address.  When we fill out the binding table, we add the distance between
 653  * the bottom of our binding table block and zero of the block pool to the
 654  * surface state offsets so that they are correct relative to out new surface
 655  * state base address at the bottom of the binding table block.
 656  *
 657  * \see adjust_relocations_from_block_pool()
 658  * \see adjust_relocations_too_block_pool()
 659  *
 660  * \param[in]  entries        The number of surface state entries the binding
 661  *                            table should be able to hold.
 662  *
 663  * \param[out] state_offset   The offset surface surface state base address
 664  *                            where the surface states live.  This must be
 665  *                            added to the surface state offset when it is
 666  *                            written into the binding table entry.
 667  *
 668  * \return                    An anv_state representing the binding table
 669  */
 670 struct anv_state
 671 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
 672                                    uint32_t entries, uint32_t *state_offset)
 673 {
 674    struct anv_device *device = cmd_buffer->device;
 675    struct anv_state_pool *state_pool = &device->surface_state_pool;
 676    struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
 677    struct anv_state state;
 678
 679    state.alloc_size = align_u32(entries * 4, 32);
 680
 681    if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
 682       return (struct anv_state) { 0 };
 683
 684    state.offset = cmd_buffer->bt_next;
 685    state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool,
 686                                   bt_block->offset + state.offset);
 687
 688    cmd_buffer->bt_next += state.alloc_size;
 689
 690    if (device->instance->physicalDevice.use_softpin) {
 691       assert(bt_block->offset >= 0);
 692       *state_offset = device->surface_state_pool.block_pool.start_address -
 693          device->binding_table_pool.block_pool.start_address - bt_block->offset;
 694    } else {
 695       assert(bt_block->offset < 0);
 696       *state_offset = -bt_block->offset;
 697    }
 698
 699    return state;
 700 }
 701
 702 struct anv_state
 703 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
 704 {
 705    struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
 706    return anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
 707                                  isl_dev->ss.size, isl_dev->ss.align);
 708 }
 709
 710 struct anv_state
 711 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
 712                                    uint32_t size, uint32_t alignment)
 713 {
 714    return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
 715                                  size, alignment);
 716 }
 717
 718 VkResult
 719 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
 720 {
 721    struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states);
 722    if (bt_block == NULL) {
 723       anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
 724       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 725    }
 726
 727    *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device);
 728    cmd_buffer->bt_next = 0;
 729
 730    return VK_SUCCESS;
 731 }
 732
 733 VkResult
 734 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 735 {
 736    struct anv_batch_bo *batch_bo;
 737    VkResult result;
 738
 739    list_inithead(&cmd_buffer->batch_bos);
 740
 741    result = anv_batch_bo_create(cmd_buffer, &batch_bo);
 742    if (result != VK_SUCCESS)
 743       return result;
 744
 745    list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
 746
 747    cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
 748    cmd_buffer->batch.user_data = cmd_buffer;
 749
 750    if (cmd_buffer->device->can_chain_batches) {
 751       cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
 752    } else {
 753       cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch;
 754    }
 755
 756    anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
 757                       GEN8_MI_BATCH_BUFFER_START_length * 4);
 758
 759    int success = u_vector_init(&cmd_buffer->seen_bbos,
 760                                  sizeof(struct anv_bo *),
 761                                  8 * sizeof(struct anv_bo *));
 762    if (!success)
 763       goto fail_batch_bo;
 764
 765    *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
 766
 767    /* u_vector requires power-of-two size elements */
 768    unsigned pow2_state_size = util_next_power_of_two(sizeof(struct anv_state));
 769    success = u_vector_init(&cmd_buffer->bt_block_states,
 770                            pow2_state_size, 8 * pow2_state_size);
 771    if (!success)
 772       goto fail_seen_bbos;
 773
 774    result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
 775                                 &cmd_buffer->pool->alloc);
 776    if (result != VK_SUCCESS)
 777       goto fail_bt_blocks;
 778    cmd_buffer->last_ss_pool_center = 0;
 779
 780    result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
 781    if (result != VK_SUCCESS)
 782       goto fail_bt_blocks;
 783
 784    return VK_SUCCESS;
 785
 786  fail_bt_blocks:
 787    u_vector_finish(&cmd_buffer->bt_block_states);
 788  fail_seen_bbos:
 789    u_vector_finish(&cmd_buffer->seen_bbos);
 790  fail_batch_bo:
 791    anv_batch_bo_destroy(batch_bo, cmd_buffer);
 792
 793    return result;
 794 }
 795
 796 void
 797 anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 798 {
 799    struct anv_state *bt_block;
 800    u_vector_foreach(bt_block, &cmd_buffer->bt_block_states)
 801       anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
 802    u_vector_finish(&cmd_buffer->bt_block_states);
 803
 804    anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
 805
 806    u_vector_finish(&cmd_buffer->seen_bbos);
 807
 808    /* Destroy all of the batch buffers */
 809    list_for_each_entry_safe(struct anv_batch_bo, bbo,
 810                             &cmd_buffer->batch_bos, link) {
 811       anv_batch_bo_destroy(bbo, cmd_buffer);
 812    }
 813 }
 814
 815 void
 816 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 817 {
 818    /* Delete all but the first batch bo */
 819    assert(!list_is_empty(&cmd_buffer->batch_bos));
 820    while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
 821       struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
 822       list_del(&bbo->link);
 823       anv_batch_bo_destroy(bbo, cmd_buffer);
 824    }
 825    assert(!list_is_empty(&cmd_buffer->batch_bos));
 826
 827    anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
 828                       &cmd_buffer->batch,
 829                       GEN8_MI_BATCH_BUFFER_START_length * 4);
 830
 831    while (u_vector_length(&cmd_buffer->bt_block_states) > 1) {
 832       struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states);
 833       anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
 834    }
 835    assert(u_vector_length(&cmd_buffer->bt_block_states) == 1);
 836    cmd_buffer->bt_next = 0;
 837
 838    cmd_buffer->surface_relocs.num_relocs = 0;
 839    _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL);
 840    cmd_buffer->last_ss_pool_center = 0;
 841
 842    /* Reset the list of seen buffers */
 843    cmd_buffer->seen_bbos.head = 0;
 844    cmd_buffer->seen_bbos.tail = 0;
 845
 846    *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) =
 847       anv_cmd_buffer_current_batch_bo(cmd_buffer);
 848 }
 849
 850 void
 851 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
 852 {
 853    struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
 854
 855    if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
 856       /* When we start a batch buffer, we subtract a certain amount of
 857        * padding from the end to ensure that we always have room to emit a
 858        * BATCH_BUFFER_START to chain to the next BO.  We need to remove
 859        * that padding before we end the batch; otherwise, we may end up
 860        * with our BATCH_BUFFER_END in another BO.
 861        */
 862       cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
 863       assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
 864
 865       anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe);
 866
 867       /* Round batch up to an even number of dwords. */
 868       if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
 869          anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP, noop);
 870
 871       cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
 872    } else {
 873       assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
 874       /* If this is a secondary command buffer, we need to determine the
 875        * mode in which it will be executed with vkExecuteCommands.  We
 876        * determine this statically here so that this stays in sync with the
 877        * actual ExecuteCommands implementation.
 878        */
 879       const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
 880       if (!cmd_buffer->device->can_chain_batches) {
 881          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
 882       } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
 883                  (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
 884          /* If the secondary has exactly one batch buffer in its list *and*
 885           * that batch buffer is less than half of the maximum size, we're
 886           * probably better of simply copying it into our batch.
 887           */
 888          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
 889       } else if (!(cmd_buffer->usage_flags &
 890                    VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
 891          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
 892
 893          /* In order to chain, we need this command buffer to contain an
 894           * MI_BATCH_BUFFER_START which will jump back to the calling batch.
 895           * It doesn't matter where it points now so long as has a valid
 896           * relocation.  We'll adjust it later as part of the chaining
 897           * process.
 898           *
 899           * We set the end of the batch a little short so we would be sure we
 900           * have room for the chaining command.  Since we're about to emit the
 901           * chaining command, let's set it back where it should go.
 902           */
 903          cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
 904          assert(cmd_buffer->batch.start == batch_bo->bo.map);
 905          assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
 906
 907          emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0);
 908          assert(cmd_buffer->batch.start == batch_bo->bo.map);
 909       } else {
 910          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
 911       }
 912    }
 913
 914    anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
 915 }
 916
 917 static VkResult
 918 anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer,
 919                              struct list_head *list)
 920 {
 921    list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
 922       struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos);
 923       if (bbo_ptr == NULL)
 924          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 925
 926       *bbo_ptr = bbo;
 927    }
 928
 929    return VK_SUCCESS;
 930 }
 931
 932 void
 933 anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
 934                              struct anv_cmd_buffer *secondary)
 935 {
 936    switch (secondary->exec_mode) {
 937    case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
 938       anv_batch_emit_batch(&primary->batch, &secondary->batch);
 939       break;
 940    case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: {
 941       struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary);
 942       unsigned length = secondary->batch.end - secondary->batch.start;
 943       anv_batch_bo_grow(primary, bbo, &primary->batch, length,
 944                         GEN8_MI_BATCH_BUFFER_START_length * 4);
 945       anv_batch_emit_batch(&primary->batch, &secondary->batch);
 946       break;
 947    }
 948    case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: {
 949       struct anv_batch_bo *first_bbo =
 950          list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
 951       struct anv_batch_bo *last_bbo =
 952          list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
 953
 954       emit_batch_buffer_start(primary, &first_bbo->bo, 0);
 955
 956       struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
 957       assert(primary->batch.start == this_bbo->bo.map);
 958       uint32_t offset = primary->batch.next - primary->batch.start;
 959
 960       /* Make the tail of the secondary point back to right after the
 961        * MI_BATCH_BUFFER_START in the primary batch.
 962        */
 963       anv_batch_bo_link(primary, last_bbo, this_bbo, offset);
 964
 965       anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
 966       break;
 967    }
 968    case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
 969       struct list_head copy_list;
 970       VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
 971                                                 secondary,
 972                                                 &copy_list);
 973       if (result != VK_SUCCESS)
 974          return; /* FIXME */
 975
 976       anv_cmd_buffer_add_seen_bbos(primary, &copy_list);
 977
 978       struct anv_batch_bo *first_bbo =
 979          list_first_entry(&copy_list, struct anv_batch_bo, link);
 980       struct anv_batch_bo *last_bbo =
 981          list_last_entry(&copy_list, struct anv_batch_bo, link);
 982
 983       cmd_buffer_chain_to_batch_bo(primary, first_bbo);
 984
 985       list_splicetail(&copy_list, &primary->batch_bos);
 986
 987       anv_batch_bo_continue(last_bbo, &primary->batch,
 988                             GEN8_MI_BATCH_BUFFER_START_length * 4);
 989       break;
 990    }
 991    default:
 992       assert(!"Invalid execution mode");
 993    }
 994
 995    anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
 996                          &secondary->surface_relocs, 0);
 997 }
 998
 999 struct anv_execbuf {
1000    struct drm_i915_gem_execbuffer2           execbuf;
1001
1002    struct drm_i915_gem_exec_object2 *        objects;
1003    uint32_t                                  bo_count;
1004    struct anv_bo **                          bos;
1005
1006    /* Allocated length of the 'objects' and 'bos' arrays */
1007    uint32_t                                  array_length;
1008
1009    bool                                      has_relocs;
1010
1011    uint32_t                                  fence_count;
1012    uint32_t                                  fence_array_length;
1013    struct drm_i915_gem_exec_fence *          fences;
1014    struct anv_syncobj **                     syncobjs;
1015 };
1016
1017 static void
1018 anv_execbuf_init(struct anv_execbuf *exec)
1019 {
1020    memset(exec, 0, sizeof(*exec));
1021 }
1022
1023 static void
1024 anv_execbuf_finish(struct anv_execbuf *exec,
1025                    const VkAllocationCallbacks *alloc)
1026 {
1027    vk_free(alloc, exec->objects);
1028    vk_free(alloc, exec->bos);
1029    vk_free(alloc, exec->fences);
1030    vk_free(alloc, exec->syncobjs);
1031 }
1032
1033 static int
1034 _compare_bo_handles(const void *_bo1, const void *_bo2)
1035 {
1036    struct anv_bo * const *bo1 = _bo1;
1037    struct anv_bo * const *bo2 = _bo2;
1038
1039    return (*bo1)->gem_handle - (*bo2)->gem_handle;
1040 }
1041
1042 static VkResult
1043 anv_execbuf_add_bo_set(struct anv_execbuf *exec,
1044                        struct set *deps,
1045                        uint32_t extra_flags,
1046                        const VkAllocationCallbacks *alloc);
1047
1048 static VkResult
1049 anv_execbuf_add_bo(struct anv_execbuf *exec,
1050                    struct anv_bo *bo,
1051                    struct anv_reloc_list *relocs,
1052                    uint32_t extra_flags,
1053                    const VkAllocationCallbacks *alloc)
1054 {
1055    struct drm_i915_gem_exec_object2 *obj = NULL;
1056
1057    if (bo->index < exec->bo_count && exec->bos[bo->index] == bo)
1058       obj = &exec->objects[bo->index];
1059
1060    if (obj == NULL) {
1061       /* We've never seen this one before.  Add it to the list and assign
1062        * an id that we can use later.
1063        */
1064       if (exec->bo_count >= exec->array_length) {
1065          uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
1066
1067          struct drm_i915_gem_exec_object2 *new_objects =
1068             vk_alloc(alloc, new_len * sizeof(*new_objects),
1069                      8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1070          if (new_objects == NULL)
1071             return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1072
1073          struct anv_bo **new_bos =
1074             vk_alloc(alloc, new_len * sizeof(*new_bos),
1075                       8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1076          if (new_bos == NULL) {
1077             vk_free(alloc, new_objects);
1078             return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1079          }
1080
1081          if (exec->objects) {
1082             memcpy(new_objects, exec->objects,
1083                    exec->bo_count * sizeof(*new_objects));
1084             memcpy(new_bos, exec->bos,
1085                    exec->bo_count * sizeof(*new_bos));
1086          }
1087
1088          vk_free(alloc, exec->objects);
1089          vk_free(alloc, exec->bos);
1090
1091          exec->objects = new_objects;
1092          exec->bos = new_bos;
1093          exec->array_length = new_len;
1094       }
1095
1096       assert(exec->bo_count < exec->array_length);
1097
1098       bo->index = exec->bo_count++;
1099       obj = &exec->objects[bo->index];
1100       exec->bos[bo->index] = bo;
1101
1102       obj->handle = bo->gem_handle;
1103       obj->relocation_count = 0;
1104       obj->relocs_ptr = 0;
1105       obj->alignment = 0;
1106       obj->offset = bo->offset;
1107       obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
1108       obj->rsvd1 = 0;
1109       obj->rsvd2 = 0;
1110    }
1111
1112    if (relocs != NULL) {
1113       assert(obj->relocation_count == 0);
1114
1115       if (relocs->num_relocs > 0) {
1116          /* This is the first time we've ever seen a list of relocations for
1117           * this BO.  Go ahead and set the relocations and then walk the list
1118           * of relocations and add them all.
1119           */
1120          exec->has_relocs = true;
1121          obj->relocation_count = relocs->num_relocs;
1122          obj->relocs_ptr = (uintptr_t) relocs->relocs;
1123
1124          for (size_t i = 0; i < relocs->num_relocs; i++) {
1125             VkResult result;
1126
1127             /* A quick sanity check on relocations */
1128             assert(relocs->relocs[i].offset < bo->size);
1129             result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL,
1130                                         extra_flags, alloc);
1131
1132             if (result != VK_SUCCESS)
1133                return result;
1134          }
1135       }
1136
1137       return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc);
1138    }
1139
1140    return VK_SUCCESS;
1141 }
1142
1143 /* Add BO dependencies to execbuf */
1144 static VkResult
1145 anv_execbuf_add_bo_set(struct anv_execbuf *exec,
1146                        struct set *deps,
1147                        uint32_t extra_flags,
1148                        const VkAllocationCallbacks *alloc)
1149 {
1150    if (!deps || deps->entries <= 0)
1151       return VK_SUCCESS;
1152
1153    const uint32_t entries = deps->entries;
1154    struct anv_bo **bos =
1155       vk_alloc(alloc, entries * sizeof(*bos),
1156                8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1157    if (bos == NULL)
1158       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1159
1160    struct anv_bo **bo = bos;
1161    set_foreach(deps, entry) {
1162       *bo++ = (void *)entry->key;
1163    }
1164
1165    qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
1166
1167    VkResult result = VK_SUCCESS;
1168    for (bo = bos; bo < bos + entries; bo++) {
1169       result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
1170       if (result != VK_SUCCESS)
1171          break;
1172    }
1173
1174    vk_free(alloc, bos);
1175
1176    return result;
1177 }
1178
1179 static VkResult
1180 anv_execbuf_add_syncobj(struct anv_execbuf *exec,
1181                         uint32_t handle, uint32_t flags,
1182                         const VkAllocationCallbacks *alloc)
1183 {
1184    assert(flags != 0);
1185
1186    if (exec->fence_count >= exec->fence_array_length) {
1187       uint32_t new_len = MAX2(exec->fence_array_length * 2, 64);
1188
1189       exec->fences = vk_realloc(alloc, exec->fences,
1190                                 new_len * sizeof(*exec->fences),
1191                                 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1192       if (exec->fences == NULL)
1193          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1194
1195       exec->fence_array_length = new_len;
1196    }
1197
1198    exec->fences[exec->fence_count] = (struct drm_i915_gem_exec_fence) {
1199       .handle = handle,
1200       .flags = flags,
1201    };
1202
1203    exec->fence_count++;
1204
1205    return VK_SUCCESS;
1206 }
1207
1208 static void
1209 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
1210                               struct anv_reloc_list *list)
1211 {
1212    for (size_t i = 0; i < list->num_relocs; i++)
1213       list->relocs[i].target_handle = list->reloc_bos[i]->index;
1214 }
1215
1216 static void
1217 adjust_relocations_from_state_pool(struct anv_state_pool *pool,
1218                                    struct anv_reloc_list *relocs,
1219                                    uint32_t last_pool_center_bo_offset)
1220 {
1221    assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
1222    uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
1223
1224    for (size_t i = 0; i < relocs->num_relocs; i++) {
1225       /* All of the relocations from this block pool to other BO's should
1226        * have been emitted relative to the surface block pool center.  We
1227        * need to add the center offset to make them relative to the
1228        * beginning of the actual GEM bo.
1229        */
1230       relocs->relocs[i].offset += delta;
1231    }
1232 }
1233
1234 static void
1235 adjust_relocations_to_state_pool(struct anv_state_pool *pool,
1236                                  struct anv_bo *from_bo,
1237                                  struct anv_reloc_list *relocs,
1238                                  uint32_t last_pool_center_bo_offset)
1239 {
1240    assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
1241    uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
1242
1243    /* When we initially emit relocations into a block pool, we don't
1244     * actually know what the final center_bo_offset will be so we just emit
1245     * it as if center_bo_offset == 0.  Now that we know what the center
1246     * offset is, we need to walk the list of relocations and adjust any
1247     * relocations that point to the pool bo with the correct offset.
1248     */
1249    for (size_t i = 0; i < relocs->num_relocs; i++) {
1250       if (relocs->reloc_bos[i] == pool->block_pool.bo) {
1251          /* Adjust the delta value in the relocation to correctly
1252           * correspond to the new delta.  Initially, this value may have
1253           * been negative (if treated as unsigned), but we trust in
1254           * uint32_t roll-over to fix that for us at this point.
1255           */
1256          relocs->relocs[i].delta += delta;
1257
1258          /* Since the delta has changed, we need to update the actual
1259           * relocated value with the new presumed value.  This function
1260           * should only be called on batch buffers, so we know it isn't in
1261           * use by the GPU at the moment.
1262           */
1263          assert(relocs->relocs[i].offset < from_bo->size);
1264          write_reloc(pool->block_pool.device,
1265                      from_bo->map + relocs->relocs[i].offset,
1266                      relocs->relocs[i].presumed_offset +
1267                      relocs->relocs[i].delta, false);
1268       }
1269    }
1270 }
1271
1272 static void
1273 anv_reloc_list_apply(struct anv_device *device,
1274                      struct anv_reloc_list *list,
1275                      struct anv_bo *bo,
1276                      bool always_relocate)
1277 {
1278    for (size_t i = 0; i < list->num_relocs; i++) {
1279       struct anv_bo *target_bo = list->reloc_bos[i];
1280       if (list->relocs[i].presumed_offset == target_bo->offset &&
1281           !always_relocate)
1282          continue;
1283
1284       void *p = bo->map + list->relocs[i].offset;
1285       write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true);
1286       list->relocs[i].presumed_offset = target_bo->offset;
1287    }
1288 }
1289
1290 /**
1291  * This function applies the relocation for a command buffer and writes the
1292  * actual addresses into the buffers as per what we were told by the kernel on
1293  * the previous execbuf2 call.  This should be safe to do because, for each
1294  * relocated address, we have two cases:
1295  *
1296  *  1) The target BO is inactive (as seen by the kernel).  In this case, it is
1297  *     not in use by the GPU so updating the address is 100% ok.  It won't be
1298  *     in-use by the GPU (from our context) again until the next execbuf2
1299  *     happens.  If the kernel decides to move it in the next execbuf2, it
1300  *     will have to do the relocations itself, but that's ok because it should
1301  *     have all of the information needed to do so.
1302  *
1303  *  2) The target BO is active (as seen by the kernel).  In this case, it
1304  *     hasn't moved since the last execbuffer2 call because GTT shuffling
1305  *     *only* happens when the BO is idle. (From our perspective, it only
1306  *     happens inside the execbuffer2 ioctl, but the shuffling may be
1307  *     triggered by another ioctl, with full-ppgtt this is limited to only
1308  *     execbuffer2 ioctls on the same context, or memory pressure.)  Since the
1309  *     target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT
1310  *     address and the relocated value we are writing into the BO will be the
1311  *     same as the value that is already there.
1312  *
1313  *     There is also a possibility that the target BO is active but the exact
1314  *     RENDER_SURFACE_STATE object we are writing the relocation into isn't in
1315  *     use.  In this case, the address currently in the RENDER_SURFACE_STATE
1316  *     may be stale but it's still safe to write the relocation because that
1317  *     particular RENDER_SURFACE_STATE object isn't in-use by the GPU and
1318  *     won't be until the next execbuf2 call.
1319  *
1320  * By doing relocations on the CPU, we can tell the kernel that it doesn't
1321  * need to bother.  We want to do this because the surface state buffer is
1322  * used by every command buffer so, if the kernel does the relocations, it
1323  * will always be busy and the kernel will always stall.  This is also
1324  * probably the fastest mechanism for doing relocations since the kernel would
1325  * have to make a full copy of all the relocations lists.
1326  */
1327 static bool
1328 relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
1329                     struct anv_execbuf *exec)
1330 {
1331    if (!exec->has_relocs)
1332       return true;
1333
1334    static int userspace_relocs = -1;
1335    if (userspace_relocs < 0)
1336       userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true);
1337    if (!userspace_relocs)
1338       return false;
1339
1340    /* First, we have to check to see whether or not we can even do the
1341     * relocation.  New buffers which have never been submitted to the kernel
1342     * don't have a valid offset so we need to let the kernel do relocations so
1343     * that we can get offsets for them.  On future execbuf2 calls, those
1344     * buffers will have offsets and we will be able to skip relocating.
1345     * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1.
1346     */
1347    for (uint32_t i = 0; i < exec->bo_count; i++) {
1348       if (exec->bos[i]->offset == (uint64_t)-1)
1349          return false;
1350    }
1351
1352    /* Since surface states are shared between command buffers and we don't
1353     * know what order they will be submitted to the kernel, we don't know
1354     * what address is actually written in the surface state object at any
1355     * given time.  The only option is to always relocate them.
1356     */
1357    anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
1358                         cmd_buffer->device->surface_state_pool.block_pool.bo,
1359                         true /* always relocate surface states */);
1360
1361    /* Since we own all of the batch buffers, we know what values are stored
1362     * in the relocated addresses and only have to update them if the offsets
1363     * have changed.
1364     */
1365    struct anv_batch_bo **bbo;
1366    u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1367       anv_reloc_list_apply(cmd_buffer->device,
1368                            &(*bbo)->relocs, &(*bbo)->bo, false);
1369    }
1370
1371    for (uint32_t i = 0; i < exec->bo_count; i++)
1372       exec->objects[i].offset = exec->bos[i]->offset;
1373
1374    return true;
1375 }
1376
1377 static VkResult
1378 setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
1379                              struct anv_cmd_buffer *cmd_buffer)
1380 {
1381    struct anv_batch *batch = &cmd_buffer->batch;
1382    struct anv_state_pool *ss_pool =
1383       &cmd_buffer->device->surface_state_pool;
1384
1385    adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
1386                                       cmd_buffer->last_ss_pool_center);
1387    VkResult result;
1388    struct anv_bo *bo;
1389    if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
1390       anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) {
1391          result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1392                                      &cmd_buffer->device->alloc);
1393          if (result != VK_SUCCESS)
1394             return result;
1395       }
1396       /* Add surface dependencies (BOs) to the execbuf */
1397       anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
1398                              &cmd_buffer->device->alloc);
1399
1400       /* Add the BOs for all memory objects */
1401       list_for_each_entry(struct anv_device_memory, mem,
1402                           &cmd_buffer->device->memory_objects, link) {
1403          result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
1404                                      &cmd_buffer->device->alloc);
1405          if (result != VK_SUCCESS)
1406             return result;
1407       }
1408
1409       struct anv_block_pool *pool;
1410       pool = &cmd_buffer->device->dynamic_state_pool.block_pool;
1411       anv_block_pool_foreach_bo(bo, pool) {
1412          result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1413                                      &cmd_buffer->device->alloc);
1414          if (result != VK_SUCCESS)
1415             return result;
1416       }
1417
1418       pool = &cmd_buffer->device->instruction_state_pool.block_pool;
1419       anv_block_pool_foreach_bo(bo, pool) {
1420          result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1421                                      &cmd_buffer->device->alloc);
1422          if (result != VK_SUCCESS)
1423             return result;
1424       }
1425
1426       pool = &cmd_buffer->device->binding_table_pool.block_pool;
1427       anv_block_pool_foreach_bo(bo, pool) {
1428          result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1429                                      &cmd_buffer->device->alloc);
1430          if (result != VK_SUCCESS)
1431             return result;
1432       }
1433    } else {
1434       /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
1435        * will get added automatically by processing relocations on the batch
1436        * buffer.  We have to add the surface state BO manually because it has
1437        * relocations of its own that we need to be sure are processsed.
1438        */
1439       result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo,
1440                                   &cmd_buffer->surface_relocs, 0,
1441                                   &cmd_buffer->device->alloc);
1442       if (result != VK_SUCCESS)
1443          return result;
1444    }
1445
1446    /* First, we walk over all of the bos we've seen and add them and their
1447     * relocations to the validate list.
1448     */
1449    struct anv_batch_bo **bbo;
1450    u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1451       adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
1452                                        cmd_buffer->last_ss_pool_center);
1453
1454       result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0,
1455                                   &cmd_buffer->device->alloc);
1456       if (result != VK_SUCCESS)
1457          return result;
1458    }
1459
1460    /* Now that we've adjusted all of the surface state relocations, we need to
1461     * record the surface state pool center so future executions of the command
1462     * buffer can adjust correctly.
1463     */
1464    cmd_buffer->last_ss_pool_center = ss_pool->block_pool.center_bo_offset;
1465
1466    struct anv_batch_bo *first_batch_bo =
1467       list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
1468
1469    /* The kernel requires that the last entry in the validation list be the
1470     * batch buffer to execute.  We can simply swap the element
1471     * corresponding to the first batch_bo in the chain with the last
1472     * element in the list.
1473     */
1474    if (first_batch_bo->bo.index != execbuf->bo_count - 1) {
1475       uint32_t idx = first_batch_bo->bo.index;
1476       uint32_t last_idx = execbuf->bo_count - 1;
1477
1478       struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
1479       assert(execbuf->bos[idx] == &first_batch_bo->bo);
1480
1481       execbuf->objects[idx] = execbuf->objects[last_idx];
1482       execbuf->bos[idx] = execbuf->bos[last_idx];
1483       execbuf->bos[idx]->index = idx;
1484
1485       execbuf->objects[last_idx] = tmp_obj;
1486       execbuf->bos[last_idx] = &first_batch_bo->bo;
1487       first_batch_bo->bo.index = last_idx;
1488    }
1489
1490    /* If we are pinning our BOs, we shouldn't have to relocate anything */
1491    if (cmd_buffer->device->instance->physicalDevice.use_softpin)
1492       assert(!execbuf->has_relocs);
1493
1494    /* Now we go through and fixup all of the relocation lists to point to
1495     * the correct indices in the object array.  We have to do this after we
1496     * reorder the list above as some of the indices may have changed.
1497     */
1498    if (execbuf->has_relocs) {
1499       u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
1500          anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
1501
1502       anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
1503    }
1504
1505    if (!cmd_buffer->device->info.has_llc) {
1506       __builtin_ia32_mfence();
1507       u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1508          for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
1509             __builtin_ia32_clflush((*bbo)->bo.map + i);
1510       }
1511    }
1512
1513    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
1514       .buffers_ptr = (uintptr_t) execbuf->objects,
1515       .buffer_count = execbuf->bo_count,
1516       .batch_start_offset = 0,
1517       .batch_len = batch->next - batch->start,
1518       .cliprects_ptr = 0,
1519       .num_cliprects = 0,
1520       .DR1 = 0,
1521       .DR4 = 0,
1522       .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
1523       .rsvd1 = cmd_buffer->device->context_id,
1524       .rsvd2 = 0,
1525    };
1526
1527    if (relocate_cmd_buffer(cmd_buffer, execbuf)) {
1528       /* If we were able to successfully relocate everything, tell the kernel
1529        * that it can skip doing relocations. The requirement for using
1530        * NO_RELOC is:
1531        *
1532        *  1) The addresses written in the objects must match the corresponding
1533        *     reloc.presumed_offset which in turn must match the corresponding
1534        *     execobject.offset.
1535        *
1536        *  2) To avoid stalling, execobject.offset should match the current
1537        *     address of that object within the active context.
1538        *
1539        * In order to satisfy all of the invariants that make userspace
1540        * relocations to be safe (see relocate_cmd_buffer()), we need to
1541        * further ensure that the addresses we use match those used by the
1542        * kernel for the most recent execbuf2.
1543        *
1544        * The kernel may still choose to do relocations anyway if something has
1545        * moved in the GTT. In this case, the relocation list still needs to be
1546        * valid.  All relocations on the batch buffers are already valid and
1547        * kept up-to-date.  For surface state relocations, by applying the
1548        * relocations in relocate_cmd_buffer, we ensured that the address in
1549        * the RENDER_SURFACE_STATE matches presumed_offset, so it should be
1550        * safe for the kernel to relocate them as needed.
1551        */
1552       execbuf->execbuf.flags |= I915_EXEC_NO_RELOC;
1553    } else {
1554       /* In the case where we fall back to doing kernel relocations, we need
1555        * to ensure that the relocation list is valid.  All relocations on the
1556        * batch buffers are already valid and kept up-to-date.  Since surface
1557        * states are shared between command buffers and we don't know what
1558        * order they will be submitted to the kernel, we don't know what
1559        * address is actually written in the surface state object at any given
1560        * time.  The only option is to set a bogus presumed offset and let the
1561        * kernel relocate them.
1562        */
1563       for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
1564          cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
1565    }
1566
1567    return VK_SUCCESS;
1568 }
1569
1570 static VkResult
1571 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
1572 {
1573    VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo,
1574                                         NULL, 0, &device->alloc);
1575    if (result != VK_SUCCESS)
1576       return result;
1577
1578    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
1579       .buffers_ptr = (uintptr_t) execbuf->objects,
1580       .buffer_count = execbuf->bo_count,
1581       .batch_start_offset = 0,
1582       .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */
1583       .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
1584       .rsvd1 = device->context_id,
1585       .rsvd2 = 0,
1586    };
1587
1588    return VK_SUCCESS;
1589 }
1590
1591 VkResult
1592 anv_cmd_buffer_execbuf(struct anv_device *device,
1593                        struct anv_cmd_buffer *cmd_buffer,
1594                        const VkSemaphore *in_semaphores,
1595                        uint32_t num_in_semaphores,
1596                        const VkSemaphore *out_semaphores,
1597                        uint32_t num_out_semaphores,
1598                        VkFence _fence)
1599 {
1600    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1601    UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1602
1603    struct anv_execbuf execbuf;
1604    anv_execbuf_init(&execbuf);
1605
1606    int in_fence = -1;
1607    VkResult result = VK_SUCCESS;
1608    for (uint32_t i = 0; i < num_in_semaphores; i++) {
1609       ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
1610       struct anv_semaphore_impl *impl =
1611          semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1612          &semaphore->temporary : &semaphore->permanent;
1613
1614       switch (impl->type) {
1615       case ANV_SEMAPHORE_TYPE_BO:
1616          assert(!pdevice->has_syncobj);
1617          result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
1618                                      0, &device->alloc);
1619          if (result != VK_SUCCESS)
1620             return result;
1621          break;
1622
1623       case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1624          assert(!pdevice->has_syncobj);
1625          if (in_fence == -1) {
1626             in_fence = impl->fd;
1627          } else {
1628             int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd);
1629             if (merge == -1)
1630                return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
1631
1632             close(impl->fd);
1633             close(in_fence);
1634             in_fence = merge;
1635          }
1636
1637          impl->fd = -1;
1638          break;
1639
1640       case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1641          result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1642                                           I915_EXEC_FENCE_WAIT,
1643                                           &device->alloc);
1644          if (result != VK_SUCCESS)
1645             return result;
1646          break;
1647
1648       default:
1649          break;
1650       }
1651    }
1652
1653    bool need_out_fence = false;
1654    for (uint32_t i = 0; i < num_out_semaphores; i++) {
1655       ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1656
1657       /* Under most circumstances, out fences won't be temporary.  However,
1658        * the spec does allow it for opaque_fd.  From the Vulkan 1.0.53 spec:
1659        *
1660        *    "If the import is temporary, the implementation must restore the
1661        *    semaphore to its prior permanent state after submitting the next
1662        *    semaphore wait operation."
1663        *
1664        * The spec says nothing whatsoever about signal operations on
1665        * temporarily imported semaphores so it appears they are allowed.
1666        * There are also CTS tests that require this to work.
1667        */
1668       struct anv_semaphore_impl *impl =
1669          semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1670          &semaphore->temporary : &semaphore->permanent;
1671
1672       switch (impl->type) {
1673       case ANV_SEMAPHORE_TYPE_BO:
1674          assert(!pdevice->has_syncobj);
1675          result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
1676                                      EXEC_OBJECT_WRITE, &device->alloc);
1677          if (result != VK_SUCCESS)
1678             return result;
1679          break;
1680
1681       case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1682          assert(!pdevice->has_syncobj);
1683          need_out_fence = true;
1684          break;
1685
1686       case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1687          result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1688                                           I915_EXEC_FENCE_SIGNAL,
1689                                           &device->alloc);
1690          if (result != VK_SUCCESS)
1691             return result;
1692          break;
1693
1694       default:
1695          break;
1696       }
1697    }
1698
1699    if (fence) {
1700       /* Under most circumstances, out fences won't be temporary.  However,
1701        * the spec does allow it for opaque_fd.  From the Vulkan 1.0.53 spec:
1702        *
1703        *    "If the import is temporary, the implementation must restore the
1704        *    semaphore to its prior permanent state after submitting the next
1705        *    semaphore wait operation."
1706        *
1707        * The spec says nothing whatsoever about signal operations on
1708        * temporarily imported semaphores so it appears they are allowed.
1709        * There are also CTS tests that require this to work.
1710        */
1711       struct anv_fence_impl *impl =
1712          fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1713          &fence->temporary : &fence->permanent;
1714
1715       switch (impl->type) {
1716       case ANV_FENCE_TYPE_BO:
1717          assert(!pdevice->has_syncobj_wait);
1718          result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL,
1719                                      EXEC_OBJECT_WRITE, &device->alloc);
1720          if (result != VK_SUCCESS)
1721             return result;
1722          break;
1723
1724       case ANV_FENCE_TYPE_SYNCOBJ:
1725          result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1726                                           I915_EXEC_FENCE_SIGNAL,
1727                                           &device->alloc);
1728          if (result != VK_SUCCESS)
1729             return result;
1730          break;
1731
1732       default:
1733          unreachable("Invalid fence type");
1734       }
1735    }
1736
1737    if (cmd_buffer) {
1738       if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
1739          struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos);
1740
1741          device->cmd_buffer_being_decoded = cmd_buffer;
1742          gen_print_batch(&device->decoder_ctx, (*bo)->bo.map,
1743                          (*bo)->bo.size, (*bo)->bo.offset, false);
1744          device->cmd_buffer_being_decoded = NULL;
1745       }
1746
1747       result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
1748    } else {
1749       result = setup_empty_execbuf(&execbuf, device);
1750    }
1751
1752    if (result != VK_SUCCESS)
1753       return result;
1754
1755    if (execbuf.fence_count > 0) {
1756       assert(device->instance->physicalDevice.has_syncobj);
1757       execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
1758       execbuf.execbuf.num_cliprects = execbuf.fence_count;
1759       execbuf.execbuf.cliprects_ptr = (uintptr_t) execbuf.fences;
1760    }
1761
1762    if (in_fence != -1) {
1763       execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
1764       execbuf.execbuf.rsvd2 |= (uint32_t)in_fence;
1765    }
1766
1767    if (need_out_fence)
1768       execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
1769
1770    result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
1771
1772    /* Execbuf does not consume the in_fence.  It's our job to close it. */
1773    if (in_fence != -1)
1774       close(in_fence);
1775
1776    for (uint32_t i = 0; i < num_in_semaphores; i++) {
1777       ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
1778       /* From the Vulkan 1.0.53 spec:
1779        *
1780        *    "If the import is temporary, the implementation must restore the
1781        *    semaphore to its prior permanent state after submitting the next
1782        *    semaphore wait operation."
1783        *
1784        * This has to happen after the execbuf in case we close any syncobjs in
1785        * the process.
1786        */
1787       anv_semaphore_reset_temporary(device, semaphore);
1788    }
1789
1790    if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
1791       assert(!pdevice->has_syncobj_wait);
1792       /* BO fences can't be shared, so they can't be temporary. */
1793       assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
1794
1795       /* Once the execbuf has returned, we need to set the fence state to
1796        * SUBMITTED.  We can't do this before calling execbuf because
1797        * anv_GetFenceStatus does take the global device lock before checking
1798        * fence->state.
1799        *
1800        * We set the fence state to SUBMITTED regardless of whether or not the
1801        * execbuf succeeds because we need to ensure that vkWaitForFences() and
1802        * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
1803        * VK_SUCCESS) in a finite amount of time even if execbuf fails.
1804        */
1805       fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
1806    }
1807
1808    if (result == VK_SUCCESS && need_out_fence) {
1809       assert(!pdevice->has_syncobj_wait);
1810       int out_fence = execbuf.execbuf.rsvd2 >> 32;
1811       for (uint32_t i = 0; i < num_out_semaphores; i++) {
1812          ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1813          /* Out fences can't have temporary state because that would imply
1814           * that we imported a sync file and are trying to signal it.
1815           */
1816          assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
1817          struct anv_semaphore_impl *impl = &semaphore->permanent;
1818
1819          if (impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE) {
1820             assert(impl->fd == -1);
1821             impl->fd = dup(out_fence);
1822          }
1823       }
1824       close(out_fence);
1825    }
1826
1827    anv_execbuf_finish(&execbuf, &device->alloc);
1828
1829    return result;
1830 }