src/intel/vulkan/anv_device.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <string.h>
  27 #include <sys/mman.h>
  28 #include <sys/sysinfo.h>
  29 #include <unistd.h>
  30 #include <fcntl.h>
  31 #include <xf86drm.h>
  32
  33 #include "anv_private.h"
  34 #include "util/strtod.h"
  35 #include "util/debug.h"
  36 #include "util/build_id.h"
  37 #include "util/mesa-sha1.h"
  38 #include "vk_util.h"
  39
  40 #include "genxml/gen7_pack.h"
  41
  42 static void
  43 compiler_debug_log(void *data, const char *fmt, ...)
  44 { }
  45
  46 static void
  47 compiler_perf_log(void *data, const char *fmt, ...)
  48 {
  49    va_list args;
  50    va_start(args, fmt);
  51
  52    if (unlikely(INTEL_DEBUG & DEBUG_PERF))
  53       intel_logd_v(fmt, args);
  54
  55    va_end(args);
  56 }
  57
  58 static VkResult
  59 anv_compute_heap_size(int fd, uint64_t *heap_size)
  60 {
  61    uint64_t gtt_size;
  62    if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
  63                                  &gtt_size) == -1) {
  64       /* If, for whatever reason, we can't actually get the GTT size from the
  65        * kernel (too old?) fall back to the aperture size.
  66        */
  67       anv_perf_warn(NULL, NULL,
  68                     "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
  69
  70       if (anv_gem_get_aperture(fd, &gtt_size) == -1) {
  71          return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED,
  72                           "failed to get aperture size: %m");
  73       }
  74    }
  75
  76    /* Query the total ram from the system */
  77    struct sysinfo info;
  78    sysinfo(&info);
  79
  80    uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
  81
  82    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
  83     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
  84     */
  85    uint64_t available_ram;
  86    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
  87       available_ram = total_ram / 2;
  88    else
  89       available_ram = total_ram * 3 / 4;
  90
  91    /* We also want to leave some padding for things we allocate in the driver,
  92     * so don't go over 3/4 of the GTT either.
  93     */
  94    uint64_t available_gtt = gtt_size * 3 / 4;
  95
  96    *heap_size = MIN2(available_ram, available_gtt);
  97
  98    return VK_SUCCESS;
  99 }
 100
 101 static VkResult
 102 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
 103 {
 104    /* The kernel query only tells us whether or not the kernel supports the
 105     * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the
 106     * hardware has actual 48bit address support.
 107     */
 108    device->supports_48bit_addresses =
 109       (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd);
 110
 111    uint64_t heap_size;
 112    VkResult result = anv_compute_heap_size(fd, &heap_size);
 113    if (result != VK_SUCCESS)
 114       return result;
 115
 116    if (heap_size <= 3ull * (1ull << 30)) {
 117       /* In this case, everything fits nicely into the 32-bit address space,
 118        * so there's no need for supporting 48bit addresses on client-allocated
 119        * memory objects.
 120        */
 121       device->memory.heap_count = 1;
 122       device->memory.heaps[0] = (struct anv_memory_heap) {
 123          .size = heap_size,
 124          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 125          .supports_48bit_addresses = false,
 126       };
 127    } else {
 128       /* Not everything will fit nicely into a 32-bit address space.  In this
 129        * case we need a 64-bit heap.  Advertise a small 32-bit heap and a
 130        * larger 48-bit heap.  If we're in this case, then we have a total heap
 131        * size larger than 3GiB which most likely means they have 8 GiB of
 132        * video memory and so carving off 1 GiB for the 32-bit heap should be
 133        * reasonable.
 134        */
 135       const uint64_t heap_size_32bit = 1ull << 30;
 136       const uint64_t heap_size_48bit = heap_size - heap_size_32bit;
 137
 138       assert(device->supports_48bit_addresses);
 139
 140       device->memory.heap_count = 2;
 141       device->memory.heaps[0] = (struct anv_memory_heap) {
 142          .size = heap_size_48bit,
 143          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 144          .supports_48bit_addresses = true,
 145       };
 146       device->memory.heaps[1] = (struct anv_memory_heap) {
 147          .size = heap_size_32bit,
 148          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 149          .supports_48bit_addresses = false,
 150       };
 151    }
 152
 153    uint32_t type_count = 0;
 154    for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) {
 155       uint32_t valid_buffer_usage = ~0;
 156
 157       /* There appears to be a hardware issue in the VF cache where it only
 158        * considers the bottom 32 bits of memory addresses.  If you happen to
 159        * have two vertex buffers which get placed exactly 4 GiB apart and use
 160        * them in back-to-back draw calls, you can get collisions.  In order to
 161        * solve this problem, we require vertex and index buffers be bound to
 162        * memory allocated out of the 32-bit heap.
 163        */
 164       if (device->memory.heaps[heap].supports_48bit_addresses) {
 165          valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
 166                                  VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
 167       }
 168
 169       if (device->info.has_llc) {
 170          /* Big core GPUs share LLC with the CPU and thus one memory type can be
 171           * both cached and coherent at the same time.
 172           */
 173          device->memory.types[type_count++] = (struct anv_memory_type) {
 174             .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
 175                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
 176                              VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
 177                              VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
 178             .heapIndex = heap,
 179             .valid_buffer_usage = valid_buffer_usage,
 180          };
 181       } else {
 182          /* The spec requires that we expose a host-visible, coherent memory
 183           * type, but Atom GPUs don't share LLC. Thus we offer two memory types
 184           * to give the application a choice between cached, but not coherent and
 185           * coherent but uncached (WC though).
 186           */
 187          device->memory.types[type_count++] = (struct anv_memory_type) {
 188             .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
 189                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
 190                              VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
 191             .heapIndex = heap,
 192             .valid_buffer_usage = valid_buffer_usage,
 193          };
 194          device->memory.types[type_count++] = (struct anv_memory_type) {
 195             .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
 196                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
 197                              VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
 198             .heapIndex = heap,
 199             .valid_buffer_usage = valid_buffer_usage,
 200          };
 201       }
 202    }
 203    device->memory.type_count = type_count;
 204
 205    return VK_SUCCESS;
 206 }
 207
 208 static VkResult
 209 anv_physical_device_init_uuids(struct anv_physical_device *device)
 210 {
 211    const struct build_id_note *note =
 212       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
 213    if (!note) {
 214       return vk_errorf(device->instance, device,
 215                        VK_ERROR_INITIALIZATION_FAILED,
 216                        "Failed to find build-id");
 217    }
 218
 219    unsigned build_id_len = build_id_length(note);
 220    if (build_id_len < 20) {
 221       return vk_errorf(device->instance, device,
 222                        VK_ERROR_INITIALIZATION_FAILED,
 223                        "build-id too short.  It needs to be a SHA");
 224    }
 225
 226    struct mesa_sha1 sha1_ctx;
 227    uint8_t sha1[20];
 228    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
 229
 230    /* The pipeline cache UUID is used for determining when a pipeline cache is
 231     * invalid.  It needs both a driver build and the PCI ID of the device.
 232     */
 233    _mesa_sha1_init(&sha1_ctx);
 234    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
 235    _mesa_sha1_update(&sha1_ctx, &device->chipset_id,
 236                      sizeof(device->chipset_id));
 237    _mesa_sha1_final(&sha1_ctx, sha1);
 238    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
 239
 240    /* The driver UUID is used for determining sharability of images and memory
 241     * between two Vulkan instances in separate processes.  People who want to
 242     * share memory need to also check the device UUID (below) so all this
 243     * needs to be is the build-id.
 244     */
 245    memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE);
 246
 247    /* The device UUID uniquely identifies the given device within the machine.
 248     * Since we never have more than one device, this doesn't need to be a real
 249     * UUID.  However, on the off-chance that someone tries to use this to
 250     * cache pre-tiled images or something of the like, we use the PCI ID and
 251     * some bits of ISL info to ensure that this is safe.
 252     */
 253    _mesa_sha1_init(&sha1_ctx);
 254    _mesa_sha1_update(&sha1_ctx, &device->chipset_id,
 255                      sizeof(device->chipset_id));
 256    _mesa_sha1_update(&sha1_ctx, &device->isl_dev.has_bit6_swizzling,
 257                      sizeof(device->isl_dev.has_bit6_swizzling));
 258    _mesa_sha1_final(&sha1_ctx, sha1);
 259    memcpy(device->device_uuid, sha1, VK_UUID_SIZE);
 260
 261    return VK_SUCCESS;
 262 }
 263
 264 static VkResult
 265 anv_physical_device_init(struct anv_physical_device *device,
 266                          struct anv_instance *instance,
 267                          const char *path)
 268 {
 269    VkResult result;
 270    int fd;
 271
 272    brw_process_intel_debug_variable();
 273
 274    fd = open(path, O_RDWR | O_CLOEXEC);
 275    if (fd < 0)
 276       return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
 277
 278    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
 279    device->instance = instance;
 280
 281    assert(strlen(path) < ARRAY_SIZE(device->path));
 282    strncpy(device->path, path, ARRAY_SIZE(device->path));
 283
 284    device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
 285    if (!device->chipset_id) {
 286       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
 287       goto fail;
 288    }
 289
 290    device->name = gen_get_device_name(device->chipset_id);
 291    if (!gen_get_device_info(device->chipset_id, &device->info)) {
 292       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
 293       goto fail;
 294    }
 295
 296    if (device->info.is_haswell) {
 297       intel_logw("Haswell Vulkan support is incomplete");
 298    } else if (device->info.gen == 7 && !device->info.is_baytrail) {
 299       intel_logw("Ivy Bridge Vulkan support is incomplete");
 300    } else if (device->info.gen == 7 && device->info.is_baytrail) {
 301       intel_logw("Bay Trail Vulkan support is incomplete");
 302    } else if (device->info.gen >= 8 && device->info.gen <= 9) {
 303       /* Broadwell, Cherryview, Skylake, Broxton, Kabylake, Coffelake is as
 304        * fully supported as anything */
 305    } else if (device->info.gen == 10) {
 306       intel_logw("Cannonlake Vulkan support is alpha");
 307    } else {
 308       result = vk_errorf(device->instance, device,
 309                          VK_ERROR_INCOMPATIBLE_DRIVER,
 310                          "Vulkan not yet supported on %s", device->name);
 311       goto fail;
 312    }
 313
 314    device->cmd_parser_version = -1;
 315    if (device->info.gen == 7) {
 316       device->cmd_parser_version =
 317          anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
 318       if (device->cmd_parser_version == -1) {
 319          result = vk_errorf(device->instance, device,
 320                             VK_ERROR_INITIALIZATION_FAILED,
 321                             "failed to get command parser version");
 322          goto fail;
 323       }
 324    }
 325
 326    if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
 327       result = vk_errorf(device->instance, device,
 328                          VK_ERROR_INITIALIZATION_FAILED,
 329                          "kernel missing gem wait");
 330       goto fail;
 331    }
 332
 333    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
 334       result = vk_errorf(device->instance, device,
 335                          VK_ERROR_INITIALIZATION_FAILED,
 336                          "kernel missing execbuf2");
 337       goto fail;
 338    }
 339
 340    if (!device->info.has_llc &&
 341        anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
 342       result = vk_errorf(device->instance, device,
 343                          VK_ERROR_INITIALIZATION_FAILED,
 344                          "kernel missing wc mmap");
 345       goto fail;
 346    }
 347
 348    result = anv_physical_device_init_heaps(device, fd);
 349    if (result != VK_SUCCESS)
 350       goto fail;
 351
 352    device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
 353    device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE);
 354    device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY);
 355    device->has_syncobj_wait = device->has_syncobj &&
 356                               anv_gem_supports_syncobj_wait(fd);
 357
 358    bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
 359
 360    /* GENs prior to 8 do not support EU/Subslice info */
 361    if (device->info.gen >= 8) {
 362       device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
 363       device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
 364
 365       /* Without this information, we cannot get the right Braswell
 366        * brandstrings, and we have to use conservative numbers for GPGPU on
 367        * many platforms, but otherwise, things will just work.
 368        */
 369       if (device->subslice_total < 1 || device->eu_total < 1) {
 370          intel_logw("Kernel 4.1 required to properly query GPU properties");
 371       }
 372    } else if (device->info.gen == 7) {
 373       device->subslice_total = 1 << (device->info.gt - 1);
 374    }
 375
 376    if (device->info.is_cherryview &&
 377        device->subslice_total > 0 && device->eu_total > 0) {
 378       /* Logical CS threads = EUs per subslice * num threads per EU */
 379       uint32_t max_cs_threads =
 380          device->eu_total / device->subslice_total * device->info.num_thread_per_eu;
 381
 382       /* Fuse configurations may give more threads than expected, never less. */
 383       if (max_cs_threads > device->info.max_cs_threads)
 384          device->info.max_cs_threads = max_cs_threads;
 385    }
 386
 387    device->compiler = brw_compiler_create(NULL, &device->info);
 388    if (device->compiler == NULL) {
 389       result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 390       goto fail;
 391    }
 392    device->compiler->shader_debug_log = compiler_debug_log;
 393    device->compiler->shader_perf_log = compiler_perf_log;
 394    device->compiler->supports_pull_constants = false;
 395
 396    isl_device_init(&device->isl_dev, &device->info, swizzled);
 397
 398    result = anv_physical_device_init_uuids(device);
 399    if (result != VK_SUCCESS)
 400       goto fail;
 401
 402    result = anv_init_wsi(device);
 403    if (result != VK_SUCCESS) {
 404       ralloc_free(device->compiler);
 405       goto fail;
 406    }
 407
 408    device->local_fd = fd;
 409    return VK_SUCCESS;
 410
 411 fail:
 412    close(fd);
 413    return result;
 414 }
 415
 416 static void
 417 anv_physical_device_finish(struct anv_physical_device *device)
 418 {
 419    anv_finish_wsi(device);
 420    ralloc_free(device->compiler);
 421    close(device->local_fd);
 422 }
 423
 424 static void *
 425 default_alloc_func(void *pUserData, size_t size, size_t align,
 426                    VkSystemAllocationScope allocationScope)
 427 {
 428    return malloc(size);
 429 }
 430
 431 static void *
 432 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
 433                      size_t align, VkSystemAllocationScope allocationScope)
 434 {
 435    return realloc(pOriginal, size);
 436 }
 437
 438 static void
 439 default_free_func(void *pUserData, void *pMemory)
 440 {
 441    free(pMemory);
 442 }
 443
 444 static const VkAllocationCallbacks default_alloc = {
 445    .pUserData = NULL,
 446    .pfnAllocation = default_alloc_func,
 447    .pfnReallocation = default_realloc_func,
 448    .pfnFree = default_free_func,
 449 };
 450
 451 VkResult anv_CreateInstance(
 452     const VkInstanceCreateInfo*                 pCreateInfo,
 453     const VkAllocationCallbacks*                pAllocator,
 454     VkInstance*                                 pInstance)
 455 {
 456    struct anv_instance *instance;
 457
 458    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
 459
 460    /* Check if user passed a debug report callback to be used during
 461     * Create/Destroy of instance.
 462     */
 463    const VkDebugReportCallbackCreateInfoEXT *ctor_cb =
 464       vk_find_struct_const(pCreateInfo->pNext,
 465                            DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT);
 466
 467    uint32_t client_version;
 468    if (pCreateInfo->pApplicationInfo &&
 469        pCreateInfo->pApplicationInfo->apiVersion != 0) {
 470       client_version = pCreateInfo->pApplicationInfo->apiVersion;
 471    } else {
 472       client_version = VK_MAKE_VERSION(1, 0, 0);
 473    }
 474
 475    if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
 476        client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
 477
 478       if (ctor_cb && ctor_cb->flags & VK_DEBUG_REPORT_ERROR_BIT_EXT)
 479          ctor_cb->pfnCallback(VK_DEBUG_REPORT_ERROR_BIT_EXT,
 480                               VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT,
 481                               VK_NULL_HANDLE, /* No handle available yet. */
 482                               __LINE__,
 483                               0,
 484                               "anv",
 485                               "incompatible driver version",
 486                               ctor_cb->pUserData);
 487
 488       return vk_errorf(NULL, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
 489                        "Client requested version %d.%d.%d",
 490                        VK_VERSION_MAJOR(client_version),
 491                        VK_VERSION_MINOR(client_version),
 492                        VK_VERSION_PATCH(client_version));
 493    }
 494
 495    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
 496       const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
 497       if (!anv_instance_extension_supported(ext_name))
 498          return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
 499    }
 500
 501    instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
 502                          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
 503    if (!instance)
 504       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 505
 506    instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
 507
 508    if (pAllocator)
 509       instance->alloc = *pAllocator;
 510    else
 511       instance->alloc = default_alloc;
 512
 513    instance->apiVersion = client_version;
 514    instance->physicalDeviceCount = -1;
 515
 516    if (pthread_mutex_init(&instance->callbacks_mutex, NULL) != 0) {
 517       vk_free2(&default_alloc, pAllocator, instance);
 518       return vk_error(VK_ERROR_INITIALIZATION_FAILED);
 519    }
 520
 521    list_inithead(&instance->callbacks);
 522
 523    /* Store report debug callback to be used during DestroyInstance. */
 524    if (ctor_cb) {
 525       instance->destroy_debug_cb.flags = ctor_cb->flags;
 526       instance->destroy_debug_cb.callback = ctor_cb->pfnCallback;
 527       instance->destroy_debug_cb.data = ctor_cb->pUserData;
 528    }
 529
 530    _mesa_locale_init();
 531
 532    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
 533
 534    *pInstance = anv_instance_to_handle(instance);
 535
 536    return VK_SUCCESS;
 537 }
 538
 539 void anv_DestroyInstance(
 540     VkInstance                                  _instance,
 541     const VkAllocationCallbacks*                pAllocator)
 542 {
 543    ANV_FROM_HANDLE(anv_instance, instance, _instance);
 544
 545    if (!instance)
 546       return;
 547
 548    if (instance->physicalDeviceCount > 0) {
 549       /* We support at most one physical device. */
 550       assert(instance->physicalDeviceCount == 1);
 551       anv_physical_device_finish(&instance->physicalDevice);
 552    }
 553
 554    VG(VALGRIND_DESTROY_MEMPOOL(instance));
 555
 556    pthread_mutex_destroy(&instance->callbacks_mutex);
 557
 558    _mesa_locale_fini();
 559
 560    vk_free(&instance->alloc, instance);
 561 }
 562
 563 static VkResult
 564 anv_enumerate_devices(struct anv_instance *instance)
 565 {
 566    /* TODO: Check for more devices ? */
 567    drmDevicePtr devices[8];
 568    VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
 569    int max_devices;
 570
 571    instance->physicalDeviceCount = 0;
 572
 573    max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
 574    if (max_devices < 1)
 575       return VK_ERROR_INCOMPATIBLE_DRIVER;
 576
 577    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
 578       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
 579           devices[i]->bustype == DRM_BUS_PCI &&
 580           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
 581
 582          result = anv_physical_device_init(&instance->physicalDevice,
 583                         instance,
 584                         devices[i]->nodes[DRM_NODE_RENDER]);
 585          if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
 586             break;
 587       }
 588    }
 589    drmFreeDevices(devices, max_devices);
 590
 591    if (result == VK_SUCCESS)
 592       instance->physicalDeviceCount = 1;
 593
 594    return result;
 595 }
 596
 597
 598 VkResult anv_EnumeratePhysicalDevices(
 599     VkInstance                                  _instance,
 600     uint32_t*                                   pPhysicalDeviceCount,
 601     VkPhysicalDevice*                           pPhysicalDevices)
 602 {
 603    ANV_FROM_HANDLE(anv_instance, instance, _instance);
 604    VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
 605    VkResult result;
 606
 607    if (instance->physicalDeviceCount < 0) {
 608       result = anv_enumerate_devices(instance);
 609       if (result != VK_SUCCESS &&
 610           result != VK_ERROR_INCOMPATIBLE_DRIVER)
 611          return result;
 612    }
 613
 614    if (instance->physicalDeviceCount > 0) {
 615       assert(instance->physicalDeviceCount == 1);
 616       vk_outarray_append(&out, i) {
 617          *i = anv_physical_device_to_handle(&instance->physicalDevice);
 618       }
 619    }
 620
 621    return vk_outarray_status(&out);
 622 }
 623
 624 void anv_GetPhysicalDeviceFeatures(
 625     VkPhysicalDevice                            physicalDevice,
 626     VkPhysicalDeviceFeatures*                   pFeatures)
 627 {
 628    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
 629
 630    *pFeatures = (VkPhysicalDeviceFeatures) {
 631       .robustBufferAccess                       = true,
 632       .fullDrawIndexUint32                      = true,
 633       .imageCubeArray                           = true,
 634       .independentBlend                         = true,
 635       .geometryShader                           = true,
 636       .tessellationShader                       = true,
 637       .sampleRateShading                        = true,
 638       .dualSrcBlend                             = true,
 639       .logicOp                                  = true,
 640       .multiDrawIndirect                        = true,
 641       .drawIndirectFirstInstance                = true,
 642       .depthClamp                               = true,
 643       .depthBiasClamp                           = true,
 644       .fillModeNonSolid                         = true,
 645       .depthBounds                              = false,
 646       .wideLines                                = true,
 647       .largePoints                              = true,
 648       .alphaToOne                               = true,
 649       .multiViewport                            = true,
 650       .samplerAnisotropy                        = true,
 651       .textureCompressionETC2                   = pdevice->info.gen >= 8 ||
 652                                                   pdevice->info.is_baytrail,
 653       .textureCompressionASTC_LDR               = pdevice->info.gen >= 9, /* FINISHME CHV */
 654       .textureCompressionBC                     = true,
 655       .occlusionQueryPrecise                    = true,
 656       .pipelineStatisticsQuery                  = true,
 657       .fragmentStoresAndAtomics                 = true,
 658       .shaderTessellationAndGeometryPointSize   = true,
 659       .shaderImageGatherExtended                = true,
 660       .shaderStorageImageExtendedFormats        = true,
 661       .shaderStorageImageMultisample            = false,
 662       .shaderStorageImageReadWithoutFormat      = false,
 663       .shaderStorageImageWriteWithoutFormat     = true,
 664       .shaderUniformBufferArrayDynamicIndexing  = true,
 665       .shaderSampledImageArrayDynamicIndexing   = true,
 666       .shaderStorageBufferArrayDynamicIndexing  = true,
 667       .shaderStorageImageArrayDynamicIndexing   = true,
 668       .shaderClipDistance                       = true,
 669       .shaderCullDistance                       = true,
 670       .shaderFloat64                            = pdevice->info.gen >= 8,
 671       .shaderInt64                              = pdevice->info.gen >= 8,
 672       .shaderInt16                              = false,
 673       .shaderResourceMinLod                     = false,
 674       .variableMultisampleRate                  = false,
 675       .inheritedQueries                         = true,
 676    };
 677
 678    /* We can't do image stores in vec4 shaders */
 679    pFeatures->vertexPipelineStoresAndAtomics =
 680       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
 681       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
 682 }
 683
 684 void anv_GetPhysicalDeviceFeatures2KHR(
 685     VkPhysicalDevice                            physicalDevice,
 686     VkPhysicalDeviceFeatures2KHR*               pFeatures)
 687 {
 688    anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
 689
 690    vk_foreach_struct(ext, pFeatures->pNext) {
 691       switch (ext->sType) {
 692       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
 693          VkPhysicalDeviceMultiviewFeaturesKHX *features =
 694             (VkPhysicalDeviceMultiviewFeaturesKHX *)ext;
 695          features->multiview = true;
 696          features->multiviewGeometryShader = true;
 697          features->multiviewTessellationShader = true;
 698          break;
 699       }
 700
 701       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
 702          VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
 703          features->variablePointersStorageBuffer = true;
 704          features->variablePointers = false;
 705          break;
 706       }
 707
 708       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES_KHR: {
 709          VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *features =
 710             (VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *) ext;
 711          features->samplerYcbcrConversion = true;
 712          break;
 713       }
 714
 715       default:
 716          anv_debug_ignored_stype(ext->sType);
 717          break;
 718       }
 719    }
 720 }
 721
 722 void anv_GetPhysicalDeviceProperties(
 723     VkPhysicalDevice                            physicalDevice,
 724     VkPhysicalDeviceProperties*                 pProperties)
 725 {
 726    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
 727    const struct gen_device_info *devinfo = &pdevice->info;
 728
 729    /* See assertions made when programming the buffer surface state. */
 730    const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
 731                                       (1ul << 30) : (1ul << 27);
 732
 733    const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
 734                                  128 : 16;
 735
 736    VkSampleCountFlags sample_counts =
 737       isl_device_get_sample_counts(&pdevice->isl_dev);
 738
 739    VkPhysicalDeviceLimits limits = {
 740       .maxImageDimension1D                      = (1 << 14),
 741       .maxImageDimension2D                      = (1 << 14),
 742       .maxImageDimension3D                      = (1 << 11),
 743       .maxImageDimensionCube                    = (1 << 14),
 744       .maxImageArrayLayers                      = (1 << 11),
 745       .maxTexelBufferElements                   = 128 * 1024 * 1024,
 746       .maxUniformBufferRange                    = (1ul << 27),
 747       .maxStorageBufferRange                    = max_raw_buffer_sz,
 748       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
 749       .maxMemoryAllocationCount                 = UINT32_MAX,
 750       .maxSamplerAllocationCount                = 64 * 1024,
 751       .bufferImageGranularity                   = 64, /* A cache line */
 752       .sparseAddressSpaceSize                   = 0,
 753       .maxBoundDescriptorSets                   = MAX_SETS,
 754       .maxPerStageDescriptorSamplers            = max_samplers,
 755       .maxPerStageDescriptorUniformBuffers      = 64,
 756       .maxPerStageDescriptorStorageBuffers      = 64,
 757       .maxPerStageDescriptorSampledImages       = max_samplers,
 758       .maxPerStageDescriptorStorageImages       = 64,
 759       .maxPerStageDescriptorInputAttachments    = 64,
 760       .maxPerStageResources                     = 250,
 761       .maxDescriptorSetSamplers                 = 256,
 762       .maxDescriptorSetUniformBuffers           = 256,
 763       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
 764       .maxDescriptorSetStorageBuffers           = 256,
 765       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
 766       .maxDescriptorSetSampledImages            = 256,
 767       .maxDescriptorSetStorageImages            = 256,
 768       .maxDescriptorSetInputAttachments         = 256,
 769       .maxVertexInputAttributes                 = MAX_VBS,
 770       .maxVertexInputBindings                   = MAX_VBS,
 771       .maxVertexInputAttributeOffset            = 2047,
 772       .maxVertexInputBindingStride              = 2048,
 773       .maxVertexOutputComponents                = 128,
 774       .maxTessellationGenerationLevel           = 64,
 775       .maxTessellationPatchSize                 = 32,
 776       .maxTessellationControlPerVertexInputComponents = 128,
 777       .maxTessellationControlPerVertexOutputComponents = 128,
 778       .maxTessellationControlPerPatchOutputComponents = 128,
 779       .maxTessellationControlTotalOutputComponents = 2048,
 780       .maxTessellationEvaluationInputComponents = 128,
 781       .maxTessellationEvaluationOutputComponents = 128,
 782       .maxGeometryShaderInvocations             = 32,
 783       .maxGeometryInputComponents               = 64,
 784       .maxGeometryOutputComponents              = 128,
 785       .maxGeometryOutputVertices                = 256,
 786       .maxGeometryTotalOutputComponents         = 1024,
 787       .maxFragmentInputComponents               = 128,
 788       .maxFragmentOutputAttachments             = 8,
 789       .maxFragmentDualSrcAttachments            = 1,
 790       .maxFragmentCombinedOutputResources       = 8,
 791       .maxComputeSharedMemorySize               = 32768,
 792       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
 793       .maxComputeWorkGroupInvocations           = 16 * devinfo->max_cs_threads,
 794       .maxComputeWorkGroupSize = {
 795          16 * devinfo->max_cs_threads,
 796          16 * devinfo->max_cs_threads,
 797          16 * devinfo->max_cs_threads,
 798       },
 799       .subPixelPrecisionBits                    = 4 /* FIXME */,
 800       .subTexelPrecisionBits                    = 4 /* FIXME */,
 801       .mipmapPrecisionBits                      = 4 /* FIXME */,
 802       .maxDrawIndexedIndexValue                 = UINT32_MAX,
 803       .maxDrawIndirectCount                     = UINT32_MAX,
 804       .maxSamplerLodBias                        = 16,
 805       .maxSamplerAnisotropy                     = 16,
 806       .maxViewports                             = MAX_VIEWPORTS,
 807       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
 808       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
 809       .viewportSubPixelBits                     = 13, /* We take a float? */
 810       .minMemoryMapAlignment                    = 4096, /* A page */
 811       .minTexelBufferOffsetAlignment            = 1,
 812       .minUniformBufferOffsetAlignment          = 16,
 813       .minStorageBufferOffsetAlignment          = 4,
 814       .minTexelOffset                           = -8,
 815       .maxTexelOffset                           = 7,
 816       .minTexelGatherOffset                     = -32,
 817       .maxTexelGatherOffset                     = 31,
 818       .minInterpolationOffset                   = -0.5,
 819       .maxInterpolationOffset                   = 0.4375,
 820       .subPixelInterpolationOffsetBits          = 4,
 821       .maxFramebufferWidth                      = (1 << 14),
 822       .maxFramebufferHeight                     = (1 << 14),
 823       .maxFramebufferLayers                     = (1 << 11),
 824       .framebufferColorSampleCounts             = sample_counts,
 825       .framebufferDepthSampleCounts             = sample_counts,
 826       .framebufferStencilSampleCounts           = sample_counts,
 827       .framebufferNoAttachmentsSampleCounts     = sample_counts,
 828       .maxColorAttachments                      = MAX_RTS,
 829       .sampledImageColorSampleCounts            = sample_counts,
 830       .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
 831       .sampledImageDepthSampleCounts            = sample_counts,
 832       .sampledImageStencilSampleCounts          = sample_counts,
 833       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
 834       .maxSampleMaskWords                       = 1,
 835       .timestampComputeAndGraphics              = false,
 836       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
 837       .maxClipDistances                         = 8,
 838       .maxCullDistances                         = 8,
 839       .maxCombinedClipAndCullDistances          = 8,
 840       .discreteQueuePriorities                  = 1,
 841       .pointSizeRange                           = { 0.125, 255.875 },
 842       .lineWidthRange                           = { 0.0, 7.9921875 },
 843       .pointSizeGranularity                     = (1.0 / 8.0),
 844       .lineWidthGranularity                     = (1.0 / 128.0),
 845       .strictLines                              = false, /* FINISHME */
 846       .standardSampleLocations                  = true,
 847       .optimalBufferCopyOffsetAlignment         = 128,
 848       .optimalBufferCopyRowPitchAlignment       = 128,
 849       .nonCoherentAtomSize                      = 64,
 850    };
 851
 852    *pProperties = (VkPhysicalDeviceProperties) {
 853       .apiVersion = anv_physical_device_api_version(pdevice),
 854       .driverVersion = vk_get_driver_version(),
 855       .vendorID = 0x8086,
 856       .deviceID = pdevice->chipset_id,
 857       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
 858       .limits = limits,
 859       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
 860    };
 861
 862    snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
 863             "%s", pdevice->name);
 864    memcpy(pProperties->pipelineCacheUUID,
 865           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
 866 }
 867
 868 void anv_GetPhysicalDeviceProperties2KHR(
 869     VkPhysicalDevice                            physicalDevice,
 870     VkPhysicalDeviceProperties2KHR*             pProperties)
 871 {
 872    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
 873
 874    anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
 875
 876    vk_foreach_struct(ext, pProperties->pNext) {
 877       switch (ext->sType) {
 878       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
 879          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
 880             (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
 881
 882          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
 883          break;
 884       }
 885
 886       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
 887          VkPhysicalDeviceIDPropertiesKHR *id_props =
 888             (VkPhysicalDeviceIDPropertiesKHR *)ext;
 889          memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
 890          memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
 891          /* The LUID is for Windows. */
 892          id_props->deviceLUIDValid = false;
 893          break;
 894       }
 895
 896       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
 897          VkPhysicalDeviceMultiviewPropertiesKHX *properties =
 898             (VkPhysicalDeviceMultiviewPropertiesKHX *)ext;
 899          properties->maxMultiviewViewCount = 16;
 900          properties->maxMultiviewInstanceIndex = UINT32_MAX / 16;
 901          break;
 902       }
 903
 904       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
 905          VkPhysicalDevicePointClippingPropertiesKHR *properties =
 906             (VkPhysicalDevicePointClippingPropertiesKHR *) ext;
 907          properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
 908          anv_finishme("Implement pop-free point clipping");
 909          break;
 910       }
 911
 912       default:
 913          anv_debug_ignored_stype(ext->sType);
 914          break;
 915       }
 916    }
 917 }
 918
 919 /* We support exactly one queue family. */
 920 static const VkQueueFamilyProperties
 921 anv_queue_family_properties = {
 922    .queueFlags = VK_QUEUE_GRAPHICS_BIT |
 923                  VK_QUEUE_COMPUTE_BIT |
 924                  VK_QUEUE_TRANSFER_BIT,
 925    .queueCount = 1,
 926    .timestampValidBits = 36, /* XXX: Real value here */
 927    .minImageTransferGranularity = { 1, 1, 1 },
 928 };
 929
 930 void anv_GetPhysicalDeviceQueueFamilyProperties(
 931     VkPhysicalDevice                            physicalDevice,
 932     uint32_t*                                   pCount,
 933     VkQueueFamilyProperties*                    pQueueFamilyProperties)
 934 {
 935    VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
 936
 937    vk_outarray_append(&out, p) {
 938       *p = anv_queue_family_properties;
 939    }
 940 }
 941
 942 void anv_GetPhysicalDeviceQueueFamilyProperties2KHR(
 943     VkPhysicalDevice                            physicalDevice,
 944     uint32_t*                                   pQueueFamilyPropertyCount,
 945     VkQueueFamilyProperties2KHR*                pQueueFamilyProperties)
 946 {
 947
 948    VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
 949
 950    vk_outarray_append(&out, p) {
 951       p->queueFamilyProperties = anv_queue_family_properties;
 952
 953       vk_foreach_struct(s, p->pNext) {
 954          anv_debug_ignored_stype(s->sType);
 955       }
 956    }
 957 }
 958
 959 void anv_GetPhysicalDeviceMemoryProperties(
 960     VkPhysicalDevice                            physicalDevice,
 961     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
 962 {
 963    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
 964
 965    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
 966    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
 967       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
 968          .propertyFlags = physical_device->memory.types[i].propertyFlags,
 969          .heapIndex     = physical_device->memory.types[i].heapIndex,
 970       };
 971    }
 972
 973    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
 974    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
 975       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
 976          .size    = physical_device->memory.heaps[i].size,
 977          .flags   = physical_device->memory.heaps[i].flags,
 978       };
 979    }
 980 }
 981
 982 void anv_GetPhysicalDeviceMemoryProperties2KHR(
 983     VkPhysicalDevice                            physicalDevice,
 984     VkPhysicalDeviceMemoryProperties2KHR*       pMemoryProperties)
 985 {
 986    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
 987                                          &pMemoryProperties->memoryProperties);
 988
 989    vk_foreach_struct(ext, pMemoryProperties->pNext) {
 990       switch (ext->sType) {
 991       default:
 992          anv_debug_ignored_stype(ext->sType);
 993          break;
 994       }
 995    }
 996 }
 997
 998 PFN_vkVoidFunction anv_GetInstanceProcAddr(
 999     VkInstance                                  instance,
1000     const char*                                 pName)
1001 {
1002    return anv_lookup_entrypoint(NULL, pName);
1003 }
1004
1005 /* With version 1+ of the loader interface the ICD should expose
1006  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
1007  */
1008 PUBLIC
1009 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1010     VkInstance                                  instance,
1011     const char*                                 pName);
1012
1013 PUBLIC
1014 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1015     VkInstance                                  instance,
1016     const char*                                 pName)
1017 {
1018    return anv_GetInstanceProcAddr(instance, pName);
1019 }
1020
1021 PFN_vkVoidFunction anv_GetDeviceProcAddr(
1022     VkDevice                                    _device,
1023     const char*                                 pName)
1024 {
1025    ANV_FROM_HANDLE(anv_device, device, _device);
1026    return anv_lookup_entrypoint(&device->info, pName);
1027 }
1028
1029 static void
1030 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
1031 {
1032    queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1033    queue->device = device;
1034    queue->pool = &device->surface_state_pool;
1035 }
1036
1037 static void
1038 anv_queue_finish(struct anv_queue *queue)
1039 {
1040 }
1041
1042 static struct anv_state
1043 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
1044 {
1045    struct anv_state state;
1046
1047    state = anv_state_pool_alloc(pool, size, align);
1048    memcpy(state.map, p, size);
1049
1050    anv_state_flush(pool->block_pool.device, state);
1051
1052    return state;
1053 }
1054
1055 struct gen8_border_color {
1056    union {
1057       float float32[4];
1058       uint32_t uint32[4];
1059    };
1060    /* Pad out to 64 bytes */
1061    uint32_t _pad[12];
1062 };
1063
1064 static void
1065 anv_device_init_border_colors(struct anv_device *device)
1066 {
1067    static const struct gen8_border_color border_colors[] = {
1068       [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
1069       [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
1070       [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
1071       [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
1072       [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
1073       [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
1074    };
1075
1076    device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
1077                                                     sizeof(border_colors), 64,
1078                                                     border_colors);
1079 }
1080
1081 static void
1082 anv_device_init_trivial_batch(struct anv_device *device)
1083 {
1084    anv_bo_init_new(&device->trivial_batch_bo, device, 4096);
1085
1086    if (device->instance->physicalDevice.has_exec_async)
1087       device->trivial_batch_bo.flags |= EXEC_OBJECT_ASYNC;
1088
1089    void *map = anv_gem_mmap(device, device->trivial_batch_bo.gem_handle,
1090                             0, 4096, 0);
1091
1092    struct anv_batch batch = {
1093       .start = map,
1094       .next = map,
1095       .end = map + 4096,
1096    };
1097
1098    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1099    anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1100
1101    if (!device->info.has_llc)
1102       gen_clflush_range(map, batch.next - map);
1103
1104    anv_gem_munmap(map, device->trivial_batch_bo.size);
1105 }
1106
1107 VkResult anv_CreateDevice(
1108     VkPhysicalDevice                            physicalDevice,
1109     const VkDeviceCreateInfo*                   pCreateInfo,
1110     const VkAllocationCallbacks*                pAllocator,
1111     VkDevice*                                   pDevice)
1112 {
1113    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
1114    VkResult result;
1115    struct anv_device *device;
1116
1117    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1118
1119    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1120       const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1121       if (!anv_physical_device_extension_supported(physical_device, ext_name))
1122          return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1123    }
1124
1125    /* Check enabled features */
1126    if (pCreateInfo->pEnabledFeatures) {
1127       VkPhysicalDeviceFeatures supported_features;
1128       anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1129       VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1130       VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1131       unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1132       for (uint32_t i = 0; i < num_features; i++) {
1133          if (enabled_feature[i] && !supported_feature[i])
1134             return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1135       }
1136    }
1137
1138    device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1139                        sizeof(*device), 8,
1140                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1141    if (!device)
1142       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1143
1144    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1145    device->instance = physical_device->instance;
1146    device->chipset_id = physical_device->chipset_id;
1147    device->lost = false;
1148
1149    if (pAllocator)
1150       device->alloc = *pAllocator;
1151    else
1152       device->alloc = physical_device->instance->alloc;
1153
1154    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
1155    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
1156    if (device->fd == -1) {
1157       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1158       goto fail_device;
1159    }
1160
1161    device->context_id = anv_gem_create_context(device);
1162    if (device->context_id == -1) {
1163       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1164       goto fail_fd;
1165    }
1166
1167    device->info = physical_device->info;
1168    device->isl_dev = physical_device->isl_dev;
1169
1170    /* On Broadwell and later, we can use batch chaining to more efficiently
1171     * implement growing command buffers.  Prior to Haswell, the kernel
1172     * command parser gets in the way and we have to fall back to growing
1173     * the batch.
1174     */
1175    device->can_chain_batches = device->info.gen >= 8;
1176
1177    device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
1178       pCreateInfo->pEnabledFeatures->robustBufferAccess;
1179
1180    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
1181       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1182       goto fail_context_id;
1183    }
1184
1185    pthread_condattr_t condattr;
1186    if (pthread_condattr_init(&condattr) != 0) {
1187       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1188       goto fail_mutex;
1189    }
1190    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
1191       pthread_condattr_destroy(&condattr);
1192       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1193       goto fail_mutex;
1194    }
1195    if (pthread_cond_init(&device->queue_submit, NULL) != 0) {
1196       pthread_condattr_destroy(&condattr);
1197       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1198       goto fail_mutex;
1199    }
1200    pthread_condattr_destroy(&condattr);
1201
1202    anv_bo_pool_init(&device->batch_bo_pool, device);
1203
1204    result = anv_bo_cache_init(&device->bo_cache);
1205    if (result != VK_SUCCESS)
1206       goto fail_batch_bo_pool;
1207
1208    result = anv_state_pool_init(&device->dynamic_state_pool, device, 16384);
1209    if (result != VK_SUCCESS)
1210       goto fail_bo_cache;
1211
1212    result = anv_state_pool_init(&device->instruction_state_pool, device, 16384);
1213    if (result != VK_SUCCESS)
1214       goto fail_dynamic_state_pool;
1215
1216    result = anv_state_pool_init(&device->surface_state_pool, device, 4096);
1217    if (result != VK_SUCCESS)
1218       goto fail_instruction_state_pool;
1219
1220    result = anv_bo_init_new(&device->workaround_bo, device, 1024);
1221    if (result != VK_SUCCESS)
1222       goto fail_surface_state_pool;
1223
1224    anv_device_init_trivial_batch(device);
1225
1226    anv_scratch_pool_init(device, &device->scratch_pool);
1227
1228    anv_queue_init(device, &device->queue);
1229
1230    switch (device->info.gen) {
1231    case 7:
1232       if (!device->info.is_haswell)
1233          result = gen7_init_device_state(device);
1234       else
1235          result = gen75_init_device_state(device);
1236       break;
1237    case 8:
1238       result = gen8_init_device_state(device);
1239       break;
1240    case 9:
1241       result = gen9_init_device_state(device);
1242       break;
1243    case 10:
1244       result = gen10_init_device_state(device);
1245       break;
1246    default:
1247       /* Shouldn't get here as we don't create physical devices for any other
1248        * gens. */
1249       unreachable("unhandled gen");
1250    }
1251    if (result != VK_SUCCESS)
1252       goto fail_workaround_bo;
1253
1254    anv_device_init_blorp(device);
1255
1256    anv_device_init_border_colors(device);
1257
1258    *pDevice = anv_device_to_handle(device);
1259
1260    return VK_SUCCESS;
1261
1262  fail_workaround_bo:
1263    anv_queue_finish(&device->queue);
1264    anv_scratch_pool_finish(device, &device->scratch_pool);
1265    anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
1266    anv_gem_close(device, device->workaround_bo.gem_handle);
1267  fail_surface_state_pool:
1268    anv_state_pool_finish(&device->surface_state_pool);
1269  fail_instruction_state_pool:
1270    anv_state_pool_finish(&device->instruction_state_pool);
1271  fail_dynamic_state_pool:
1272    anv_state_pool_finish(&device->dynamic_state_pool);
1273  fail_bo_cache:
1274    anv_bo_cache_finish(&device->bo_cache);
1275  fail_batch_bo_pool:
1276    anv_bo_pool_finish(&device->batch_bo_pool);
1277    pthread_cond_destroy(&device->queue_submit);
1278  fail_mutex:
1279    pthread_mutex_destroy(&device->mutex);
1280  fail_context_id:
1281    anv_gem_destroy_context(device, device->context_id);
1282  fail_fd:
1283    close(device->fd);
1284  fail_device:
1285    vk_free(&device->alloc, device);
1286
1287    return result;
1288 }
1289
1290 void anv_DestroyDevice(
1291     VkDevice                                    _device,
1292     const VkAllocationCallbacks*                pAllocator)
1293 {
1294    ANV_FROM_HANDLE(anv_device, device, _device);
1295
1296    if (!device)
1297       return;
1298
1299    anv_device_finish_blorp(device);
1300
1301    anv_queue_finish(&device->queue);
1302
1303 #ifdef HAVE_VALGRIND
1304    /* We only need to free these to prevent valgrind errors.  The backing
1305     * BO will go away in a couple of lines so we don't actually leak.
1306     */
1307    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1308 #endif
1309
1310    anv_scratch_pool_finish(device, &device->scratch_pool);
1311
1312    anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
1313    anv_gem_close(device, device->workaround_bo.gem_handle);
1314
1315    anv_gem_close(device, device->trivial_batch_bo.gem_handle);
1316
1317    anv_state_pool_finish(&device->surface_state_pool);
1318    anv_state_pool_finish(&device->instruction_state_pool);
1319    anv_state_pool_finish(&device->dynamic_state_pool);
1320
1321    anv_bo_cache_finish(&device->bo_cache);
1322
1323    anv_bo_pool_finish(&device->batch_bo_pool);
1324
1325    pthread_cond_destroy(&device->queue_submit);
1326    pthread_mutex_destroy(&device->mutex);
1327
1328    anv_gem_destroy_context(device, device->context_id);
1329
1330    close(device->fd);
1331
1332    vk_free(&device->alloc, device);
1333 }
1334
1335 VkResult anv_EnumerateInstanceLayerProperties(
1336     uint32_t*                                   pPropertyCount,
1337     VkLayerProperties*                          pProperties)
1338 {
1339    if (pProperties == NULL) {
1340       *pPropertyCount = 0;
1341       return VK_SUCCESS;
1342    }
1343
1344    /* None supported at this time */
1345    return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1346 }
1347
1348 VkResult anv_EnumerateDeviceLayerProperties(
1349     VkPhysicalDevice                            physicalDevice,
1350     uint32_t*                                   pPropertyCount,
1351     VkLayerProperties*                          pProperties)
1352 {
1353    if (pProperties == NULL) {
1354       *pPropertyCount = 0;
1355       return VK_SUCCESS;
1356    }
1357
1358    /* None supported at this time */
1359    return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1360 }
1361
1362 void anv_GetDeviceQueue(
1363     VkDevice                                    _device,
1364     uint32_t                                    queueNodeIndex,
1365     uint32_t                                    queueIndex,
1366     VkQueue*                                    pQueue)
1367 {
1368    ANV_FROM_HANDLE(anv_device, device, _device);
1369
1370    assert(queueIndex == 0);
1371
1372    *pQueue = anv_queue_to_handle(&device->queue);
1373 }
1374
1375 VkResult
1376 anv_device_query_status(struct anv_device *device)
1377 {
1378    /* This isn't likely as most of the callers of this function already check
1379     * for it.  However, it doesn't hurt to check and it potentially lets us
1380     * avoid an ioctl.
1381     */
1382    if (unlikely(device->lost))
1383       return VK_ERROR_DEVICE_LOST;
1384
1385    uint32_t active, pending;
1386    int ret = anv_gem_gpu_get_reset_stats(device, &active, &pending);
1387    if (ret == -1) {
1388       /* We don't know the real error. */
1389       device->lost = true;
1390       return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1391                        "get_reset_stats failed: %m");
1392    }
1393
1394    if (active) {
1395       device->lost = true;
1396       return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1397                        "GPU hung on one of our command buffers");
1398    } else if (pending) {
1399       device->lost = true;
1400       return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1401                        "GPU hung with commands in-flight");
1402    }
1403
1404    return VK_SUCCESS;
1405 }
1406
1407 VkResult
1408 anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
1409 {
1410    /* Note:  This only returns whether or not the BO is in use by an i915 GPU.
1411     * Other usages of the BO (such as on different hardware) will not be
1412     * flagged as "busy" by this ioctl.  Use with care.
1413     */
1414    int ret = anv_gem_busy(device, bo->gem_handle);
1415    if (ret == 1) {
1416       return VK_NOT_READY;
1417    } else if (ret == -1) {
1418       /* We don't know the real error. */
1419       device->lost = true;
1420       return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1421                        "gem wait failed: %m");
1422    }
1423
1424    /* Query for device status after the busy call.  If the BO we're checking
1425     * got caught in a GPU hang we don't want to return VK_SUCCESS to the
1426     * client because it clearly doesn't have valid data.  Yes, this most
1427     * likely means an ioctl, but we just did an ioctl to query the busy status
1428     * so it's no great loss.
1429     */
1430    return anv_device_query_status(device);
1431 }
1432
1433 VkResult
1434 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1435                 int64_t timeout)
1436 {
1437    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
1438    if (ret == -1 && errno == ETIME) {
1439       return VK_TIMEOUT;
1440    } else if (ret == -1) {
1441       /* We don't know the real error. */
1442       device->lost = true;
1443       return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1444                        "gem wait failed: %m");
1445    }
1446
1447    /* Query for device status after the wait.  If the BO we're waiting on got
1448     * caught in a GPU hang we don't want to return VK_SUCCESS to the client
1449     * because it clearly doesn't have valid data.  Yes, this most likely means
1450     * an ioctl, but we just did an ioctl to wait so it's no great loss.
1451     */
1452    return anv_device_query_status(device);
1453 }
1454
1455 VkResult anv_DeviceWaitIdle(
1456     VkDevice                                    _device)
1457 {
1458    ANV_FROM_HANDLE(anv_device, device, _device);
1459    if (unlikely(device->lost))
1460       return VK_ERROR_DEVICE_LOST;
1461
1462    struct anv_batch batch;
1463
1464    uint32_t cmds[8];
1465    batch.start = batch.next = cmds;
1466    batch.end = (void *) cmds + sizeof(cmds);
1467
1468    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1469    anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1470
1471    return anv_device_submit_simple_batch(device, &batch);
1472 }
1473
1474 VkResult
1475 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1476 {
1477    uint32_t gem_handle = anv_gem_create(device, size);
1478    if (!gem_handle)
1479       return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1480
1481    anv_bo_init(bo, gem_handle, size);
1482
1483    return VK_SUCCESS;
1484 }
1485
1486 VkResult anv_AllocateMemory(
1487     VkDevice                                    _device,
1488     const VkMemoryAllocateInfo*                 pAllocateInfo,
1489     const VkAllocationCallbacks*                pAllocator,
1490     VkDeviceMemory*                             pMem)
1491 {
1492    ANV_FROM_HANDLE(anv_device, device, _device);
1493    struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1494    struct anv_device_memory *mem;
1495    VkResult result = VK_SUCCESS;
1496
1497    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1498
1499    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
1500    assert(pAllocateInfo->allocationSize > 0);
1501
1502    /* The kernel relocation API has a limitation of a 32-bit delta value
1503     * applied to the address before it is written which, in spite of it being
1504     * unsigned, is treated as signed .  Because of the way that this maps to
1505     * the Vulkan API, we cannot handle an offset into a buffer that does not
1506     * fit into a signed 32 bits.  The only mechanism we have for dealing with
1507     * this at the moment is to limit all VkDeviceMemory objects to a maximum
1508     * of 2GB each.  The Vulkan spec allows us to do this:
1509     *
1510     *    "Some platforms may have a limit on the maximum size of a single
1511     *    allocation. For example, certain systems may fail to create
1512     *    allocations with a size greater than or equal to 4GB. Such a limit is
1513     *    implementation-dependent, and if such a failure occurs then the error
1514     *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
1515     *
1516     * We don't use vk_error here because it's not an error so much as an
1517     * indication to the application that the allocation is too large.
1518     */
1519    if (pAllocateInfo->allocationSize > (1ull << 31))
1520       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1521
1522    /* FINISHME: Fail if allocation request exceeds heap size. */
1523
1524    mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1525                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1526    if (mem == NULL)
1527       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1528
1529    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
1530    mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
1531    mem->map = NULL;
1532    mem->map_size = 0;
1533
1534    const VkImportMemoryFdInfoKHR *fd_info =
1535       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
1536
1537    /* The Vulkan spec permits handleType to be 0, in which case the struct is
1538     * ignored.
1539     */
1540    if (fd_info && fd_info->handleType) {
1541       /* At the moment, we only support the OPAQUE_FD memory type which is
1542        * just a GEM buffer.
1543        */
1544       assert(fd_info->handleType ==
1545              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
1546
1547       result = anv_bo_cache_import(device, &device->bo_cache,
1548                                    fd_info->fd, &mem->bo);
1549       if (result != VK_SUCCESS)
1550          goto fail;
1551
1552       VkDeviceSize aligned_alloc_size =
1553          align_u64(pAllocateInfo->allocationSize, 4096);
1554
1555       /* For security purposes, we reject importing the bo if it's smaller
1556        * than the requested allocation size.  This prevents a malicious client
1557        * from passing a buffer to a trusted client, lying about the size, and
1558        * telling the trusted client to try and texture from an image that goes
1559        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
1560        * in the trusted client.  The trusted client can protect itself against
1561        * this sort of attack but only if it can trust the buffer size.
1562        */
1563       if (mem->bo->size < aligned_alloc_size) {
1564          result = vk_errorf(device->instance, device,
1565                             VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
1566                             "aligned allocationSize too large for "
1567                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: "
1568                             "%"PRIu64"B > %"PRIu64"B",
1569                             aligned_alloc_size, mem->bo->size);
1570          anv_bo_cache_release(device, &device->bo_cache, mem->bo);
1571          goto fail;
1572       }
1573
1574       /* From the Vulkan spec:
1575        *
1576        *    "Importing memory from a file descriptor transfers ownership of
1577        *    the file descriptor from the application to the Vulkan
1578        *    implementation. The application must not perform any operations on
1579        *    the file descriptor after a successful import."
1580        *
1581        * If the import fails, we leave the file descriptor open.
1582        */
1583       close(fd_info->fd);
1584    } else {
1585       result = anv_bo_cache_alloc(device, &device->bo_cache,
1586                                   pAllocateInfo->allocationSize,
1587                                   &mem->bo);
1588       if (result != VK_SUCCESS)
1589          goto fail;
1590    }
1591
1592    assert(mem->type->heapIndex < pdevice->memory.heap_count);
1593    if (pdevice->memory.heaps[mem->type->heapIndex].supports_48bit_addresses)
1594       mem->bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
1595
1596    if (pdevice->has_exec_async)
1597       mem->bo->flags |= EXEC_OBJECT_ASYNC;
1598
1599    *pMem = anv_device_memory_to_handle(mem);
1600
1601    return VK_SUCCESS;
1602
1603  fail:
1604    vk_free2(&device->alloc, pAllocator, mem);
1605
1606    return result;
1607 }
1608
1609 VkResult anv_GetMemoryFdKHR(
1610     VkDevice                                    device_h,
1611     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
1612     int*                                        pFd)
1613 {
1614    ANV_FROM_HANDLE(anv_device, dev, device_h);
1615    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
1616
1617    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1618
1619    /* We support only one handle type. */
1620    assert(pGetFdInfo->handleType ==
1621           VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
1622
1623    return anv_bo_cache_export(dev, &dev->bo_cache, mem->bo, pFd);
1624 }
1625
1626 VkResult anv_GetMemoryFdPropertiesKHR(
1627     VkDevice                                    device_h,
1628     VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
1629     int                                         fd,
1630     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
1631 {
1632    /* The valid usage section for this function says:
1633     *
1634     *    "handleType must not be one of the handle types defined as opaque."
1635     *
1636     * Since we only handle opaque handles for now, there are no FD properties.
1637     */
1638    return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1639 }
1640
1641 void anv_FreeMemory(
1642     VkDevice                                    _device,
1643     VkDeviceMemory                              _mem,
1644     const VkAllocationCallbacks*                pAllocator)
1645 {
1646    ANV_FROM_HANDLE(anv_device, device, _device);
1647    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1648
1649    if (mem == NULL)
1650       return;
1651
1652    if (mem->map)
1653       anv_UnmapMemory(_device, _mem);
1654
1655    anv_bo_cache_release(device, &device->bo_cache, mem->bo);
1656
1657    vk_free2(&device->alloc, pAllocator, mem);
1658 }
1659
1660 VkResult anv_MapMemory(
1661     VkDevice                                    _device,
1662     VkDeviceMemory                              _memory,
1663     VkDeviceSize                                offset,
1664     VkDeviceSize                                size,
1665     VkMemoryMapFlags                            flags,
1666     void**                                      ppData)
1667 {
1668    ANV_FROM_HANDLE(anv_device, device, _device);
1669    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1670
1671    if (mem == NULL) {
1672       *ppData = NULL;
1673       return VK_SUCCESS;
1674    }
1675
1676    if (size == VK_WHOLE_SIZE)
1677       size = mem->bo->size - offset;
1678
1679    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1680     *
1681     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1682     *    assert(size != 0);
1683     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1684     *    equal to the size of the memory minus offset
1685     */
1686    assert(size > 0);
1687    assert(offset + size <= mem->bo->size);
1688
1689    /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1690     * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1691     * at a time is valid. We could just mmap up front and return an offset
1692     * pointer here, but that may exhaust virtual memory on 32 bit
1693     * userspace. */
1694
1695    uint32_t gem_flags = 0;
1696
1697    if (!device->info.has_llc &&
1698        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1699       gem_flags |= I915_MMAP_WC;
1700
1701    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
1702    uint64_t map_offset = offset & ~4095ull;
1703    assert(offset >= map_offset);
1704    uint64_t map_size = (offset + size) - map_offset;
1705
1706    /* Let's map whole pages */
1707    map_size = align_u64(map_size, 4096);
1708
1709    void *map = anv_gem_mmap(device, mem->bo->gem_handle,
1710                             map_offset, map_size, gem_flags);
1711    if (map == MAP_FAILED)
1712       return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
1713
1714    mem->map = map;
1715    mem->map_size = map_size;
1716
1717    *ppData = mem->map + (offset - map_offset);
1718
1719    return VK_SUCCESS;
1720 }
1721
1722 void anv_UnmapMemory(
1723     VkDevice                                    _device,
1724     VkDeviceMemory                              _memory)
1725 {
1726    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1727
1728    if (mem == NULL)
1729       return;
1730
1731    anv_gem_munmap(mem->map, mem->map_size);
1732
1733    mem->map = NULL;
1734    mem->map_size = 0;
1735 }
1736
1737 static void
1738 clflush_mapped_ranges(struct anv_device         *device,
1739                       uint32_t                   count,
1740                       const VkMappedMemoryRange *ranges)
1741 {
1742    for (uint32_t i = 0; i < count; i++) {
1743       ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1744       if (ranges[i].offset >= mem->map_size)
1745          continue;
1746
1747       gen_clflush_range(mem->map + ranges[i].offset,
1748                         MIN2(ranges[i].size, mem->map_size - ranges[i].offset));
1749    }
1750 }
1751
1752 VkResult anv_FlushMappedMemoryRanges(
1753     VkDevice                                    _device,
1754     uint32_t                                    memoryRangeCount,
1755     const VkMappedMemoryRange*                  pMemoryRanges)
1756 {
1757    ANV_FROM_HANDLE(anv_device, device, _device);
1758
1759    if (device->info.has_llc)
1760       return VK_SUCCESS;
1761
1762    /* Make sure the writes we're flushing have landed. */
1763    __builtin_ia32_mfence();
1764
1765    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1766
1767    return VK_SUCCESS;
1768 }
1769
1770 VkResult anv_InvalidateMappedMemoryRanges(
1771     VkDevice                                    _device,
1772     uint32_t                                    memoryRangeCount,
1773     const VkMappedMemoryRange*                  pMemoryRanges)
1774 {
1775    ANV_FROM_HANDLE(anv_device, device, _device);
1776
1777    if (device->info.has_llc)
1778       return VK_SUCCESS;
1779
1780    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1781
1782    /* Make sure no reads get moved up above the invalidate. */
1783    __builtin_ia32_mfence();
1784
1785    return VK_SUCCESS;
1786 }
1787
1788 void anv_GetBufferMemoryRequirements(
1789     VkDevice                                    _device,
1790     VkBuffer                                    _buffer,
1791     VkMemoryRequirements*                       pMemoryRequirements)
1792 {
1793    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1794    ANV_FROM_HANDLE(anv_device, device, _device);
1795    struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1796
1797    /* The Vulkan spec (git aaed022) says:
1798     *
1799     *    memoryTypeBits is a bitfield and contains one bit set for every
1800     *    supported memory type for the resource. The bit `1<<i` is set if and
1801     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1802     *    structure for the physical device is supported.
1803     */
1804    uint32_t memory_types = 0;
1805    for (uint32_t i = 0; i < pdevice->memory.type_count; i++) {
1806       uint32_t valid_usage = pdevice->memory.types[i].valid_buffer_usage;
1807       if ((valid_usage & buffer->usage) == buffer->usage)
1808          memory_types |= (1u << i);
1809    }
1810
1811    pMemoryRequirements->size = buffer->size;
1812    pMemoryRequirements->alignment = 16;
1813    pMemoryRequirements->memoryTypeBits = memory_types;
1814 }
1815
1816 void anv_GetBufferMemoryRequirements2KHR(
1817     VkDevice                                    _device,
1818     const VkBufferMemoryRequirementsInfo2KHR*   pInfo,
1819     VkMemoryRequirements2KHR*                   pMemoryRequirements)
1820 {
1821    anv_GetBufferMemoryRequirements(_device, pInfo->buffer,
1822                                    &pMemoryRequirements->memoryRequirements);
1823
1824    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
1825       switch (ext->sType) {
1826       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
1827          VkMemoryDedicatedRequirementsKHR *requirements = (void *)ext;
1828          requirements->prefersDedicatedAllocation = VK_FALSE;
1829          requirements->requiresDedicatedAllocation = VK_FALSE;
1830          break;
1831       }
1832
1833       default:
1834          anv_debug_ignored_stype(ext->sType);
1835          break;
1836       }
1837    }
1838 }
1839
1840 void anv_GetImageMemoryRequirements(
1841     VkDevice                                    _device,
1842     VkImage                                     _image,
1843     VkMemoryRequirements*                       pMemoryRequirements)
1844 {
1845    ANV_FROM_HANDLE(anv_image, image, _image);
1846    ANV_FROM_HANDLE(anv_device, device, _device);
1847    struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1848
1849    /* The Vulkan spec (git aaed022) says:
1850     *
1851     *    memoryTypeBits is a bitfield and contains one bit set for every
1852     *    supported memory type for the resource. The bit `1<<i` is set if and
1853     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1854     *    structure for the physical device is supported.
1855     *
1856     * All types are currently supported for images.
1857     */
1858    uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1;
1859
1860    pMemoryRequirements->size = image->size;
1861    pMemoryRequirements->alignment = image->alignment;
1862    pMemoryRequirements->memoryTypeBits = memory_types;
1863 }
1864
1865 void anv_GetImageMemoryRequirements2KHR(
1866     VkDevice                                    _device,
1867     const VkImageMemoryRequirementsInfo2KHR*    pInfo,
1868     VkMemoryRequirements2KHR*                   pMemoryRequirements)
1869 {
1870    anv_GetImageMemoryRequirements(_device, pInfo->image,
1871                                   &pMemoryRequirements->memoryRequirements);
1872
1873    vk_foreach_struct_const(ext, pInfo->pNext) {
1874       switch (ext->sType) {
1875       case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO_KHR: {
1876          ANV_FROM_HANDLE(anv_image, image, pInfo->image);
1877          ANV_FROM_HANDLE(anv_device, device, _device);
1878          struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1879          const VkImagePlaneMemoryRequirementsInfoKHR *plane_reqs =
1880             (const VkImagePlaneMemoryRequirementsInfoKHR *) ext;
1881          uint32_t plane = anv_image_aspect_to_plane(image->aspects,
1882                                                     plane_reqs->planeAspect);
1883
1884          assert(image->planes[plane].offset == 0);
1885
1886          /* The Vulkan spec (git aaed022) says:
1887           *
1888           *    memoryTypeBits is a bitfield and contains one bit set for every
1889           *    supported memory type for the resource. The bit `1<<i` is set
1890           *    if and only if the memory type `i` in the
1891           *    VkPhysicalDeviceMemoryProperties structure for the physical
1892           *    device is supported.
1893           *
1894           * All types are currently supported for images.
1895           */
1896          pMemoryRequirements->memoryRequirements.memoryTypeBits =
1897                (1ull << pdevice->memory.type_count) - 1;
1898
1899          pMemoryRequirements->memoryRequirements.size = image->planes[plane].size;
1900          pMemoryRequirements->memoryRequirements.alignment =
1901             image->planes[plane].alignment;
1902          break;
1903       }
1904
1905       default:
1906          anv_debug_ignored_stype(ext->sType);
1907          break;
1908       }
1909    }
1910
1911    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
1912       switch (ext->sType) {
1913       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
1914          VkMemoryDedicatedRequirementsKHR *requirements = (void *)ext;
1915          requirements->prefersDedicatedAllocation = VK_FALSE;
1916          requirements->requiresDedicatedAllocation = VK_FALSE;
1917          break;
1918       }
1919
1920       default:
1921          anv_debug_ignored_stype(ext->sType);
1922          break;
1923       }
1924    }
1925 }
1926
1927 void anv_GetImageSparseMemoryRequirements(
1928     VkDevice                                    device,
1929     VkImage                                     image,
1930     uint32_t*                                   pSparseMemoryRequirementCount,
1931     VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
1932 {
1933    *pSparseMemoryRequirementCount = 0;
1934 }
1935
1936 void anv_GetImageSparseMemoryRequirements2KHR(
1937     VkDevice                                    device,
1938     const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
1939     uint32_t*                                   pSparseMemoryRequirementCount,
1940     VkSparseImageMemoryRequirements2KHR*        pSparseMemoryRequirements)
1941 {
1942    *pSparseMemoryRequirementCount = 0;
1943 }
1944
1945 void anv_GetDeviceMemoryCommitment(
1946     VkDevice                                    device,
1947     VkDeviceMemory                              memory,
1948     VkDeviceSize*                               pCommittedMemoryInBytes)
1949 {
1950    *pCommittedMemoryInBytes = 0;
1951 }
1952
1953 static void
1954 anv_bind_buffer_memory(const VkBindBufferMemoryInfoKHR *pBindInfo)
1955 {
1956    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
1957    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
1958
1959    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR);
1960
1961    if (mem) {
1962       assert((buffer->usage & mem->type->valid_buffer_usage) == buffer->usage);
1963       buffer->bo = mem->bo;
1964       buffer->offset = pBindInfo->memoryOffset;
1965    } else {
1966       buffer->bo = NULL;
1967       buffer->offset = 0;
1968    }
1969 }
1970
1971 VkResult anv_BindBufferMemory(
1972     VkDevice                                    device,
1973     VkBuffer                                    buffer,
1974     VkDeviceMemory                              memory,
1975     VkDeviceSize                                memoryOffset)
1976 {
1977    anv_bind_buffer_memory(
1978       &(VkBindBufferMemoryInfoKHR) {
1979          .sType         = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
1980          .buffer        = buffer,
1981          .memory        = memory,
1982          .memoryOffset  = memoryOffset,
1983       });
1984
1985    return VK_SUCCESS;
1986 }
1987
1988 VkResult anv_BindBufferMemory2KHR(
1989     VkDevice                                    device,
1990     uint32_t                                    bindInfoCount,
1991     const VkBindBufferMemoryInfoKHR*            pBindInfos)
1992 {
1993    for (uint32_t i = 0; i < bindInfoCount; i++)
1994       anv_bind_buffer_memory(&pBindInfos[i]);
1995
1996    return VK_SUCCESS;
1997 }
1998
1999 VkResult anv_QueueBindSparse(
2000     VkQueue                                     _queue,
2001     uint32_t                                    bindInfoCount,
2002     const VkBindSparseInfo*                     pBindInfo,
2003     VkFence                                     fence)
2004 {
2005    ANV_FROM_HANDLE(anv_queue, queue, _queue);
2006    if (unlikely(queue->device->lost))
2007       return VK_ERROR_DEVICE_LOST;
2008
2009    return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
2010 }
2011
2012 // Event functions
2013
2014 VkResult anv_CreateEvent(
2015     VkDevice                                    _device,
2016     const VkEventCreateInfo*                    pCreateInfo,
2017     const VkAllocationCallbacks*                pAllocator,
2018     VkEvent*                                    pEvent)
2019 {
2020    ANV_FROM_HANDLE(anv_device, device, _device);
2021    struct anv_state state;
2022    struct anv_event *event;
2023
2024    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
2025
2026    state = anv_state_pool_alloc(&device->dynamic_state_pool,
2027                                 sizeof(*event), 8);
2028    event = state.map;
2029    event->state = state;
2030    event->semaphore = VK_EVENT_RESET;
2031
2032    if (!device->info.has_llc) {
2033       /* Make sure the writes we're flushing have landed. */
2034       __builtin_ia32_mfence();
2035       __builtin_ia32_clflush(event);
2036    }
2037
2038    *pEvent = anv_event_to_handle(event);
2039
2040    return VK_SUCCESS;
2041 }
2042
2043 void anv_DestroyEvent(
2044     VkDevice                                    _device,
2045     VkEvent                                     _event,
2046     const VkAllocationCallbacks*                pAllocator)
2047 {
2048    ANV_FROM_HANDLE(anv_device, device, _device);
2049    ANV_FROM_HANDLE(anv_event, event, _event);
2050
2051    if (!event)
2052       return;
2053
2054    anv_state_pool_free(&device->dynamic_state_pool, event->state);
2055 }
2056
2057 VkResult anv_GetEventStatus(
2058     VkDevice                                    _device,
2059     VkEvent                                     _event)
2060 {
2061    ANV_FROM_HANDLE(anv_device, device, _device);
2062    ANV_FROM_HANDLE(anv_event, event, _event);
2063
2064    if (unlikely(device->lost))
2065       return VK_ERROR_DEVICE_LOST;
2066
2067    if (!device->info.has_llc) {
2068       /* Invalidate read cache before reading event written by GPU. */
2069       __builtin_ia32_clflush(event);
2070       __builtin_ia32_mfence();
2071
2072    }
2073
2074    return event->semaphore;
2075 }
2076
2077 VkResult anv_SetEvent(
2078     VkDevice                                    _device,
2079     VkEvent                                     _event)
2080 {
2081    ANV_FROM_HANDLE(anv_device, device, _device);
2082    ANV_FROM_HANDLE(anv_event, event, _event);
2083
2084    event->semaphore = VK_EVENT_SET;
2085
2086    if (!device->info.has_llc) {
2087       /* Make sure the writes we're flushing have landed. */
2088       __builtin_ia32_mfence();
2089       __builtin_ia32_clflush(event);
2090    }
2091
2092    return VK_SUCCESS;
2093 }
2094
2095 VkResult anv_ResetEvent(
2096     VkDevice                                    _device,
2097     VkEvent                                     _event)
2098 {
2099    ANV_FROM_HANDLE(anv_device, device, _device);
2100    ANV_FROM_HANDLE(anv_event, event, _event);
2101
2102    event->semaphore = VK_EVENT_RESET;
2103
2104    if (!device->info.has_llc) {
2105       /* Make sure the writes we're flushing have landed. */
2106       __builtin_ia32_mfence();
2107       __builtin_ia32_clflush(event);
2108    }
2109
2110    return VK_SUCCESS;
2111 }
2112
2113 // Buffer functions
2114
2115 VkResult anv_CreateBuffer(
2116     VkDevice                                    _device,
2117     const VkBufferCreateInfo*                   pCreateInfo,
2118     const VkAllocationCallbacks*                pAllocator,
2119     VkBuffer*                                   pBuffer)
2120 {
2121    ANV_FROM_HANDLE(anv_device, device, _device);
2122    struct anv_buffer *buffer;
2123
2124    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2125
2126    buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2127                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2128    if (buffer == NULL)
2129       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2130
2131    buffer->size = pCreateInfo->size;
2132    buffer->usage = pCreateInfo->usage;
2133    buffer->bo = NULL;
2134    buffer->offset = 0;
2135
2136    *pBuffer = anv_buffer_to_handle(buffer);
2137
2138    return VK_SUCCESS;
2139 }
2140
2141 void anv_DestroyBuffer(
2142     VkDevice                                    _device,
2143     VkBuffer                                    _buffer,
2144     const VkAllocationCallbacks*                pAllocator)
2145 {
2146    ANV_FROM_HANDLE(anv_device, device, _device);
2147    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
2148
2149    if (!buffer)
2150       return;
2151
2152    vk_free2(&device->alloc, pAllocator, buffer);
2153 }
2154
2155 void
2156 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
2157                               enum isl_format format,
2158                               uint32_t offset, uint32_t range, uint32_t stride)
2159 {
2160    isl_buffer_fill_state(&device->isl_dev, state.map,
2161                          .address = offset,
2162                          .mocs = device->default_mocs,
2163                          .size = range,
2164                          .format = format,
2165                          .stride = stride);
2166
2167    anv_state_flush(device, state);
2168 }
2169
2170 void anv_DestroySampler(
2171     VkDevice                                    _device,
2172     VkSampler                                   _sampler,
2173     const VkAllocationCallbacks*                pAllocator)
2174 {
2175    ANV_FROM_HANDLE(anv_device, device, _device);
2176    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
2177
2178    if (!sampler)
2179       return;
2180
2181    vk_free2(&device->alloc, pAllocator, sampler);
2182 }
2183
2184 VkResult anv_CreateFramebuffer(
2185     VkDevice                                    _device,
2186     const VkFramebufferCreateInfo*              pCreateInfo,
2187     const VkAllocationCallbacks*                pAllocator,
2188     VkFramebuffer*                              pFramebuffer)
2189 {
2190    ANV_FROM_HANDLE(anv_device, device, _device);
2191    struct anv_framebuffer *framebuffer;
2192
2193    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2194
2195    size_t size = sizeof(*framebuffer) +
2196                  sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
2197    framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2198                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2199    if (framebuffer == NULL)
2200       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2201
2202    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2203    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2204       VkImageView _iview = pCreateInfo->pAttachments[i];
2205       framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
2206    }
2207
2208    framebuffer->width = pCreateInfo->width;
2209    framebuffer->height = pCreateInfo->height;
2210    framebuffer->layers = pCreateInfo->layers;
2211
2212    *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
2213
2214    return VK_SUCCESS;
2215 }
2216
2217 void anv_DestroyFramebuffer(
2218     VkDevice                                    _device,
2219     VkFramebuffer                               _fb,
2220     const VkAllocationCallbacks*                pAllocator)
2221 {
2222    ANV_FROM_HANDLE(anv_device, device, _device);
2223    ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
2224
2225    if (!fb)
2226       return;
2227
2228    vk_free2(&device->alloc, pAllocator, fb);
2229 }
2230
2231 /* vk_icd.h does not declare this function, so we declare it here to
2232  * suppress Wmissing-prototypes.
2233  */
2234 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2235 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
2236
2237 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2238 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
2239 {
2240    /* For the full details on loader interface versioning, see
2241     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2242     * What follows is a condensed summary, to help you navigate the large and
2243     * confusing official doc.
2244     *
2245     *   - Loader interface v0 is incompatible with later versions. We don't
2246     *     support it.
2247     *
2248     *   - In loader interface v1:
2249     *       - The first ICD entrypoint called by the loader is
2250     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2251     *         entrypoint.
2252     *       - The ICD must statically expose no other Vulkan symbol unless it is
2253     *         linked with -Bsymbolic.
2254     *       - Each dispatchable Vulkan handle created by the ICD must be
2255     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
2256     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2257     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2258     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
2259     *         such loader-managed surfaces.
2260     *
2261     *    - Loader interface v2 differs from v1 in:
2262     *       - The first ICD entrypoint called by the loader is
2263     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2264     *         statically expose this entrypoint.
2265     *
2266     *    - Loader interface v3 differs from v2 in:
2267     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2268     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2269     *          because the loader no longer does so.
2270     */
2271    *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2272    return VK_SUCCESS;
2273 }