vk/formats: Document new meaning of anv_format::cpp
[mesa.git] / src / vulkan / device.c
index 0a80a201d0306f3ef85f12ea8a5c0f94e74dd3fe..74771e300314acae87f6b85abe7088bcf5d3ce33 100644 (file)
@@ -114,7 +114,7 @@ static const VkAllocCallbacks default_alloc_callbacks = {
    .pfnFree = default_free
 };
 
-VkResult VKAPI vkCreateInstance(
+VkResult anv_CreateInstance(
     const VkInstanceCreateInfo*                 pCreateInfo,
     VkInstance*                                 pInstance)
 {
@@ -142,15 +142,17 @@ VkResult VKAPI vkCreateInstance(
    instance->physicalDeviceCount = 0;
    result = fill_physical_device(&instance->physicalDevice,
                                  instance, "/dev/dri/renderD128");
-   if (result == VK_SUCCESS)
-      instance->physicalDeviceCount++;
 
+   if (result != VK_SUCCESS)
+      return result;
+
+   instance->physicalDeviceCount++;
    *pInstance = (VkInstance) instance;
 
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkDestroyInstance(
+VkResult anv_DestroyInstance(
     VkInstance                                  _instance)
 {
    struct anv_instance *instance = (struct anv_instance *) _instance;
@@ -160,7 +162,7 @@ VkResult VKAPI vkDestroyInstance(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkEnumeratePhysicalDevices(
+VkResult anv_EnumeratePhysicalDevices(
     VkInstance                                  _instance,
     uint32_t*                                   pPhysicalDeviceCount,
     VkPhysicalDevice*                           pPhysicalDevices)
@@ -174,7 +176,7 @@ VkResult VKAPI vkEnumeratePhysicalDevices(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkGetPhysicalDeviceInfo(
+VkResult anv_GetPhysicalDeviceInfo(
     VkPhysicalDevice                            physicalDevice,
     VkPhysicalDeviceInfoType                    infoType,
     size_t*                                     pDataSize,
@@ -185,13 +187,16 @@ VkResult VKAPI vkGetPhysicalDeviceInfo(
    VkPhysicalDevicePerformance *performance;
    VkPhysicalDeviceQueueProperties *queue_properties;
    VkPhysicalDeviceMemoryProperties *memory_properties;
+   VkDisplayPropertiesWSI *display_properties;
    uint64_t ns_per_tick = 80;
    
-   switch (infoType) {
+   switch ((uint32_t) infoType) {
    case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES:
       properties = pData;
-      assert(*pDataSize >= sizeof(*properties));
-      *pDataSize = sizeof(*properties); /* Assuming we have to return the size of our struct. */
+
+      *pDataSize = sizeof(*properties);
+      if (pData == NULL)
+         return VK_SUCCESS;
 
       properties->apiVersion = 1;
       properties->driverVersion = 1;
@@ -200,19 +205,21 @@ VkResult VKAPI vkGetPhysicalDeviceInfo(
       properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
       strcpy(properties->deviceName, device->name);
       properties->maxInlineMemoryUpdateSize = 0;
-      properties->maxBoundDescriptorSets = 0;
-      properties->maxThreadGroupSize = 0;
+      properties->maxBoundDescriptorSets = MAX_SETS;
+      properties->maxThreadGroupSize = 512;
       properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick;
-      properties->multiColorAttachmentClears = 0;
-      properties->maxDescriptorSets = 2;
+      properties->multiColorAttachmentClears = true;
+      properties->maxDescriptorSets = 8;
       properties->maxViewports = 16;
       properties->maxColorAttachments = 8;
       return VK_SUCCESS;
 
    case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE:
       performance = pData;
-      assert(*pDataSize >= sizeof(*performance));
-      *pDataSize = sizeof(*performance); /* Assuming we have to return the size of our struct. */
+
+      *pDataSize = sizeof(*performance);
+      if (pData == NULL)
+         return VK_SUCCESS;
 
       performance->maxDeviceClock = 1.0;
       performance->aluPerClock = 1.0;
@@ -223,25 +230,46 @@ VkResult VKAPI vkGetPhysicalDeviceInfo(
       
    case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES:
       queue_properties = pData;
-      assert(*pDataSize >= sizeof(*queue_properties));
+
       *pDataSize = sizeof(*queue_properties);
+      if (pData == NULL)
+         return VK_SUCCESS;
 
       queue_properties->queueFlags = 0;
       queue_properties->queueCount = 1;
       queue_properties->maxAtomicCounters = 0;
-      queue_properties->supportsTimestamps = 0;
-      queue_properties->maxMemReferences = 0;
+      queue_properties->supportsTimestamps = true;
+      queue_properties->maxMemReferences = 256;
       return VK_SUCCESS;
 
    case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES:
       memory_properties = pData;
-      assert(*pDataSize >= sizeof(*memory_properties));
+
       *pDataSize = sizeof(*memory_properties);
+      if (pData == NULL)
+         return VK_SUCCESS;
 
       memory_properties->supportsMigration = false;
       memory_properties->supportsPinning = false;
       return VK_SUCCESS;
 
+   case VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI:
+      anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI");
+
+      *pDataSize = sizeof(*display_properties);
+      if (pData == NULL)
+         return VK_SUCCESS;
+
+      display_properties = pData;
+      display_properties->display = 0;
+      display_properties->physicalResolution = (VkExtent2D) { 0, 0 };
+      return VK_SUCCESS;
+
+   case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI:
+      anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI");
+      return VK_SUCCESS;
+
+
    default:
       return VK_UNSUPPORTED;
    }
@@ -252,7 +280,7 @@ void * vkGetProcAddr(
     VkPhysicalDevice                            physicalDevice,
     const char*                                 pName)
 {
-   return NULL;
+   return anv_lookup_entrypoint(pName);
 }
 
 static void
@@ -275,7 +303,65 @@ parse_debug_flags(struct anv_device *device)
    }
 }
 
-VkResult VKAPI vkCreateDevice(
+static VkResult
+anv_queue_init(struct anv_device *device, struct anv_queue *queue)
+{
+   queue->device = device;
+   queue->pool = &device->surface_state_pool;
+
+   queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
+   if (queue->completed_serial.map == NULL)
+      return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+   *(uint32_t *)queue->completed_serial.map = 0;
+   queue->next_serial = 1;
+
+   return VK_SUCCESS;
+}
+
+static void
+anv_queue_finish(struct anv_queue *queue)
+{
+#ifdef HAVE_VALGRIND
+   /* This gets torn down with the device so we only need to do this if
+    * valgrind is present.
+    */
+   anv_state_pool_free(queue->pool, queue->completed_serial);
+#endif
+}
+
+static void
+anv_device_init_border_colors(struct anv_device *device)
+{
+   float float_border_colors[][4] = {
+      [VK_BORDER_COLOR_OPAQUE_WHITE]            = { 1.0, 1.0, 1.0, 1.0 },
+      [VK_BORDER_COLOR_TRANSPARENT_BLACK]       = { 0.0, 0.0, 0.0, 0.0 },
+      [VK_BORDER_COLOR_OPAQUE_BLACK]            = { 0.0, 0.0, 0.0, 1.0 }
+   };
+
+   uint32_t uint32_border_colors[][4] = {
+      [VK_BORDER_COLOR_OPAQUE_WHITE]            = { 1, 1, 1, 1 },
+      [VK_BORDER_COLOR_TRANSPARENT_BLACK]       = { 0, 0, 0, 0 },
+      [VK_BORDER_COLOR_OPAQUE_BLACK]            = { 0, 0, 0, 1 }
+   };
+
+   device->float_border_colors =
+      anv_state_pool_alloc(&device->dynamic_state_pool,
+                           sizeof(float_border_colors), 32);
+   memcpy(device->float_border_colors.map,
+          float_border_colors, sizeof(float_border_colors));
+
+   device->uint32_border_colors =
+      anv_state_pool_alloc(&device->dynamic_state_pool,
+                           sizeof(uint32_border_colors), 32);
+   memcpy(device->uint32_border_colors.map,
+          uint32_border_colors, sizeof(uint32_border_colors));
+
+}
+
+static const uint32_t BATCH_SIZE = 8192;
+
+VkResult anv_CreateDevice(
     VkPhysicalDevice                            _physicalDevice,
     const VkDeviceCreateInfo*                   pCreateInfo,
     VkDevice*                                   pDevice)
@@ -305,6 +391,8 @@ VkResult VKAPI vkCreateDevice(
    if (device->context_id == -1)
       goto fail_fd;
 
+   anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
+
    anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
 
    anv_state_pool_init(&device->dynamic_state_pool,
@@ -316,15 +404,21 @@ VkResult VKAPI vkCreateDevice(
    anv_state_pool_init(&device->surface_state_pool,
                        &device->surface_state_block_pool);
 
-   device->compiler = anv_compiler_create(device->fd);
-   device->aub_writer = NULL;
+   anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
 
    device->info = *physicalDevice->info;
 
+   device->compiler = anv_compiler_create(device);
+   device->aub_writer = NULL;
+
    pthread_mutex_init(&device->mutex, NULL);
 
+   anv_queue_init(device, &device->queue);
+
    anv_device_init_meta(device);
 
+   anv_device_init_border_colors(device);
+
    *pDevice = (VkDevice) device;
 
    return VK_SUCCESS;
@@ -337,13 +431,28 @@ VkResult VKAPI vkCreateDevice(
    return vk_error(VK_ERROR_UNAVAILABLE);
 }
 
-VkResult VKAPI vkDestroyDevice(
+VkResult anv_DestroyDevice(
     VkDevice                                    _device)
 {
    struct anv_device *device = (struct anv_device *) _device;
 
    anv_compiler_destroy(device->compiler);
 
+   anv_queue_finish(&device->queue);
+
+   anv_device_finish_meta(device);
+
+#ifdef HAVE_VALGRIND
+   /* We only need to free these to prevent valgrind errors.  The backing
+    * BO will go away in a couple of lines so we don't actually leak.
+    */
+   anv_state_pool_free(&device->dynamic_state_pool,
+                       device->float_border_colors);
+   anv_state_pool_free(&device->dynamic_state_pool,
+                       device->uint32_border_colors);
+#endif
+
+   anv_bo_pool_finish(&device->batch_bo_pool);
    anv_block_pool_finish(&device->dynamic_state_block_pool);
    anv_block_pool_finish(&device->instruction_block_pool);
    anv_block_pool_finish(&device->surface_state_block_pool);
@@ -358,30 +467,40 @@ VkResult VKAPI vkDestroyDevice(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkGetGlobalExtensionInfo(
+VkResult anv_GetGlobalExtensionInfo(
     VkExtensionInfoType                         infoType,
     uint32_t                                    extensionIndex,
     size_t*                                     pDataSize,
     void*                                       pData)
 {
-   uint32_t *count;
+   static const VkExtensionProperties extensions[] = {
+      {
+         .extName = "VK_WSI_LunarG",
+         .version = 3
+      }
+   };
+   uint32_t count = ARRAY_SIZE(extensions);
 
    switch (infoType) {
    case VK_EXTENSION_INFO_TYPE_COUNT:
-      count = pData;
-      assert(*pDataSize == 4);
-      *count = 0;
+      memcpy(pData, &count, sizeof(count));
+      *pDataSize = sizeof(count);
       return VK_SUCCESS;
-      
+
    case VK_EXTENSION_INFO_TYPE_PROPERTIES:
-      return vk_error(VK_ERROR_INVALID_EXTENSION);
-      
+      if (extensionIndex >= count)
+         return vk_error(VK_ERROR_INVALID_EXTENSION);
+
+      memcpy(pData, &extensions[extensionIndex], sizeof(extensions[0]));
+      *pDataSize = sizeof(extensions[0]);
+      return VK_SUCCESS;
+
    default:
       return VK_UNSUPPORTED;
    }
 }
 
-VkResult VKAPI vkGetPhysicalDeviceExtensionInfo(
+VkResult anv_GetPhysicalDeviceExtensionInfo(
     VkPhysicalDevice                            physicalDevice,
     VkExtensionInfoType                         infoType,
     uint32_t                                    extensionIndex,
@@ -392,8 +511,11 @@ VkResult VKAPI vkGetPhysicalDeviceExtensionInfo(
 
    switch (infoType) {
    case VK_EXTENSION_INFO_TYPE_COUNT:
+      *pDataSize = 4;
+      if (pData == NULL)
+         return VK_SUCCESS;
+
       count = pData;
-      assert(*pDataSize == 4);
       *count = 0;
       return VK_SUCCESS;
       
@@ -405,7 +527,7 @@ VkResult VKAPI vkGetPhysicalDeviceExtensionInfo(
    }
 }
 
-VkResult VKAPI vkEnumerateLayers(
+VkResult anv_EnumerateLayers(
     VkPhysicalDevice                            physicalDevice,
     size_t                                      maxStringSize,
     size_t*                                     pLayerCount,
@@ -417,110 +539,180 @@ VkResult VKAPI vkEnumerateLayers(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkGetDeviceQueue(
+VkResult anv_GetDeviceQueue(
     VkDevice                                    _device,
     uint32_t                                    queueNodeIndex,
     uint32_t                                    queueIndex,
     VkQueue*                                    pQueue)
 {
    struct anv_device *device = (struct anv_device *) _device;
-   struct anv_queue *queue;
 
-   /* FIXME: Should allocate these at device create time. */
+   assert(queueIndex == 0);
 
-   queue = anv_device_alloc(device, sizeof(*queue), 8,
-                            VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
-   if (queue == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   *pQueue = (VkQueue) &device->queue;
 
-   queue->device = device;
-   queue->pool = &device->surface_state_pool;
+   return VK_SUCCESS;
+}
 
-   queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
-   *(uint32_t *)queue->completed_serial.map = 0;
-   queue->next_serial = 1;
+VkResult
+anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
+{
+   list->num_relocs = 0;
+   list->array_length = 256;
+   list->relocs =
+      anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
+                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
 
-   *pQueue = (VkQueue) queue;
+   if (list->relocs == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   list->reloc_bos =
+      anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
+                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+
+   if (list->relocs == NULL) {
+      anv_device_free(device, list->relocs);
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
 
    return VK_SUCCESS;
 }
 
-static const uint32_t BATCH_SIZE = 8192;
+void
+anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
+{
+   anv_device_free(device, list->relocs);
+   anv_device_free(device, list->reloc_bos);
+}
 
-VkResult
-anv_batch_init(struct anv_batch *batch, struct anv_device *device)
+static VkResult
+anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
+                    size_t num_additional_relocs)
+{
+   if (list->num_relocs + num_additional_relocs <= list->array_length)
+      return VK_SUCCESS;
+
+   size_t new_length = list->array_length * 2;
+   while (new_length < list->num_relocs + num_additional_relocs)
+      new_length *= 2;
+
+   struct drm_i915_gem_relocation_entry *new_relocs =
+      anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
+                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+   if (new_relocs == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   struct anv_bo **new_reloc_bos =
+      anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
+                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+   if (new_relocs == NULL) {
+      anv_device_free(device, new_relocs);
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
+   memcpy(new_reloc_bos, list->reloc_bos,
+          list->num_relocs * sizeof(*list->reloc_bos));
+
+   anv_device_free(device, list->relocs);
+   anv_device_free(device, list->reloc_bos);
+
+   list->relocs = new_relocs;
+   list->reloc_bos = new_reloc_bos;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
 {
    VkResult result;
 
-   result = anv_bo_init_new(&batch->bo, device, BATCH_SIZE);
-   if (result != VK_SUCCESS)
-      return result;
+   struct anv_batch_bo *bbo =
+      anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+   if (bbo == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   batch->bo.map =
-      anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE);
-   if (batch->bo.map == NULL) {
-      anv_gem_close(device, batch->bo.gem_handle);
-      return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
+   bbo->num_relocs = 0;
+   bbo->prev_batch_bo = NULL;
+
+   result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
+   if (result != VK_SUCCESS) {
+      anv_device_free(device, bbo);
+      return result;
    }
 
-   batch->cmd_relocs.num_relocs = 0;
-   batch->surf_relocs.num_relocs = 0;
-   batch->next = batch->bo.map;
+   *bbo_out = bbo;
 
    return VK_SUCCESS;
 }
 
-void
-anv_batch_finish(struct anv_batch *batch, struct anv_device *device)
+static void
+anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
+                   size_t batch_padding)
 {
-   anv_gem_munmap(batch->bo.map, BATCH_SIZE);
-   anv_gem_close(device, batch->bo.gem_handle);
+   batch->next = batch->start = bbo->bo.map;
+   batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
+   bbo->first_reloc = batch->relocs.num_relocs;
 }
 
-void
-anv_batch_reset(struct anv_batch *batch)
+static void
+anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
+{
+   assert(batch->start == bbo->bo.map);
+   bbo->length = batch->next - batch->start;
+   VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
+   bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
+}
+
+static void
+anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
 {
-   batch->next = batch->bo.map;
-   batch->cmd_relocs.num_relocs = 0;
-   batch->surf_relocs.num_relocs = 0;
+   anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
+   anv_device_free(device, bbo);
 }
 
 void *
 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
 {
+   if (batch->next + num_dwords * 4 > batch->end)
+      batch->extend_cb(batch, batch->user_data);
+
    void *p = batch->next;
 
    batch->next += num_dwords * 4;
+   assert(batch->next <= batch->end);
 
    return p;
 }
 
 static void
-anv_reloc_list_append(struct anv_reloc_list *list,
+anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
                       struct anv_reloc_list *other, uint32_t offset)
 {
-   uint32_t i, count;
+   anv_reloc_list_grow(list, device, other->num_relocs);
+   /* TODO: Handle failure */
 
-   count = list->num_relocs;
-   memcpy(&list->relocs[count], &other->relocs[0],
+   memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
           other->num_relocs * sizeof(other->relocs[0]));
-   memcpy(&list->reloc_bos[count], &other->reloc_bos[0],
+   memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
           other->num_relocs * sizeof(other->reloc_bos[0]));
-   for (i = 0; i < other->num_relocs; i++)
-      list->relocs[i + count].offset += offset;
 
-   count += other->num_relocs;
+   for (uint32_t i = 0; i < other->num_relocs; i++)
+      list->relocs[i + list->num_relocs].offset += offset;
+
+   list->num_relocs += other->num_relocs;
 }
 
 static uint64_t
-anv_reloc_list_add(struct anv_reloc_list *list,
-                   uint32_t offset,
-                   struct anv_bo *target_bo, uint32_t delta)
+anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
+                   uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
 {
    struct drm_i915_gem_relocation_entry *entry;
    int index;
 
-   assert(list->num_relocs < ANV_BATCH_MAX_RELOCS);
+   anv_reloc_list_grow(list, device, 1);
+   /* TODO: Handle failure */
 
    /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
    index = list->num_relocs++;
@@ -541,12 +733,19 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
 {
    uint32_t size, offset;
 
-   size = other->next - other->bo.map;
-   memcpy(batch->next, other->bo.map, size);
+   size = other->next - other->start;
+   assert(size % 4 == 0);
+
+   if (batch->next + size > batch->end)
+      batch->extend_cb(batch, batch->user_data);
+
+   assert(batch->next + size <= batch->end);
 
-   offset = batch->next - batch->bo.map;
-   anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset);
-   anv_reloc_list_append(&batch->surf_relocs, &other->surf_relocs, offset);
+   memcpy(batch->next, other->start, size);
+
+   offset = batch->next - batch->start;
+   anv_reloc_list_append(&batch->relocs, batch->device,
+                         &other->relocs, offset);
 
    batch->next += size;
 }
@@ -555,18 +754,19 @@ uint64_t
 anv_batch_emit_reloc(struct anv_batch *batch,
                      void *location, struct anv_bo *bo, uint32_t delta)
 {
-   return anv_reloc_list_add(&batch->cmd_relocs,
-                             location - batch->bo.map, bo, delta);
+   return anv_reloc_list_add(&batch->relocs, batch->device,
+                             location - batch->start, bo, delta);
 }
 
-VkResult VKAPI vkQueueSubmit(
+VkResult anv_QueueSubmit(
     VkQueue                                     _queue,
     uint32_t                                    cmdBufferCount,
     const VkCmdBuffer*                          pCmdBuffers,
-    VkFence                                     fence)
+    VkFence                                     _fence)
 {
    struct anv_queue *queue = (struct anv_queue *) _queue;
    struct anv_device *device = queue->device;
+   struct anv_fence *fence = (struct anv_fence *) _fence;
    int ret;
 
    for (uint32_t i = 0; i < cmdBufferCount; i++) {
@@ -581,6 +781,12 @@ VkResult VKAPI vkQueueSubmit(
          if (ret != 0)
             return vk_error(VK_ERROR_UNKNOWN);
 
+         if (fence) {
+            ret = anv_gem_execbuffer(device, &fence->execbuf);
+            if (ret != 0)
+               return vk_error(VK_ERROR_UNKNOWN);
+         }
+
          for (uint32_t i = 0; i < cmd_buffer->bo_count; i++)
             cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset;
       } else {
@@ -591,7 +797,7 @@ VkResult VKAPI vkQueueSubmit(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkQueueAddMemReferences(
+VkResult anv_QueueAddMemReferences(
     VkQueue                                     queue,
     uint32_t                                    count,
     const VkDeviceMemory*                       pMems)
@@ -599,7 +805,7 @@ VkResult VKAPI vkQueueAddMemReferences(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkQueueRemoveMemReferences(
+VkResult anv_QueueRemoveMemReferences(
     VkQueue                                     queue,
     uint32_t                                    count,
     const VkDeviceMemory*                       pMems)
@@ -607,7 +813,7 @@ VkResult VKAPI vkQueueRemoveMemReferences(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkQueueWaitIdle(
+VkResult anv_QueueWaitIdle(
     VkQueue                                     _queue)
 {
    struct anv_queue *queue = (struct anv_queue *) _queue;
@@ -615,7 +821,7 @@ VkResult VKAPI vkQueueWaitIdle(
    return vkDeviceWaitIdle((VkDevice) queue->device);
 }
 
-VkResult VKAPI vkDeviceWaitIdle(
+VkResult anv_DeviceWaitIdle(
     VkDevice                                    _device)
 {
    struct anv_device *device = (struct anv_device *) _device;
@@ -630,7 +836,8 @@ VkResult VKAPI vkDeviceWaitIdle(
 
    state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
    bo = &device->dynamic_state_pool.block_pool->bo;
-   batch.next = state.map;
+   batch.start = batch.next = state.map;
+   batch.end = state.map + 32;
    anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
    anv_batch_emit(&batch, GEN8_MI_NOOP);
 
@@ -717,7 +924,7 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkAllocMemory(
+VkResult anv_AllocMemory(
     VkDevice                                    _device,
     const VkMemoryAllocInfo*                    pAllocInfo,
     VkDeviceMemory*                             pMem)
@@ -747,7 +954,7 @@ VkResult VKAPI vkAllocMemory(
    return result;
 }
 
-VkResult VKAPI vkFreeMemory(
+VkResult anv_FreeMemory(
     VkDevice                                    _device,
     VkDeviceMemory                              _mem)
 {
@@ -765,7 +972,7 @@ VkResult VKAPI vkFreeMemory(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkSetMemoryPriority(
+VkResult anv_SetMemoryPriority(
     VkDevice                                    device,
     VkDeviceMemory                              mem,
     VkMemoryPriority                            priority)
@@ -773,7 +980,7 @@ VkResult VKAPI vkSetMemoryPriority(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkMapMemory(
+VkResult anv_MapMemory(
     VkDevice                                    _device,
     VkDeviceMemory                              _mem,
     VkDeviceSize                                offset,
@@ -798,7 +1005,7 @@ VkResult VKAPI vkMapMemory(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkUnmapMemory(
+VkResult anv_UnmapMemory(
     VkDevice                                    _device,
     VkDeviceMemory                              _mem)
 {
@@ -809,7 +1016,7 @@ VkResult VKAPI vkUnmapMemory(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkFlushMappedMemory(
+VkResult anv_FlushMappedMemory(
     VkDevice                                    device,
     VkDeviceMemory                              mem,
     VkDeviceSize                                offset,
@@ -820,7 +1027,7 @@ VkResult VKAPI vkFlushMappedMemory(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkPinSystemMemory(
+VkResult anv_PinSystemMemory(
     VkDevice                                    device,
     const void*                                 pSysMem,
     size_t                                      memSize,
@@ -829,7 +1036,7 @@ VkResult VKAPI vkPinSystemMemory(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkGetMultiDeviceCompatibility(
+VkResult anv_GetMultiDeviceCompatibility(
     VkPhysicalDevice                            physicalDevice0,
     VkPhysicalDevice                            physicalDevice1,
     VkPhysicalDeviceCompatibilityInfo*          pInfo)
@@ -837,7 +1044,7 @@ VkResult VKAPI vkGetMultiDeviceCompatibility(
    return VK_UNSUPPORTED;
 }
 
-VkResult VKAPI vkOpenSharedMemory(
+VkResult anv_OpenSharedMemory(
     VkDevice                                    device,
     const VkMemoryOpenInfo*                     pOpenInfo,
     VkDeviceMemory*                             pMem)
@@ -845,7 +1052,7 @@ VkResult VKAPI vkOpenSharedMemory(
    return VK_UNSUPPORTED;
 }
 
-VkResult VKAPI vkOpenSharedSemaphore(
+VkResult anv_OpenSharedSemaphore(
     VkDevice                                    device,
     const VkSemaphoreOpenInfo*                  pOpenInfo,
     VkSemaphore*                                pSemaphore)
@@ -853,7 +1060,7 @@ VkResult VKAPI vkOpenSharedSemaphore(
    return VK_UNSUPPORTED;
 }
 
-VkResult VKAPI vkOpenPeerMemory(
+VkResult anv_OpenPeerMemory(
     VkDevice                                    device,
     const VkPeerMemoryOpenInfo*                 pOpenInfo,
     VkDeviceMemory*                             pMem)
@@ -861,7 +1068,7 @@ VkResult VKAPI vkOpenPeerMemory(
    return VK_UNSUPPORTED;
 }
 
-VkResult VKAPI vkOpenPeerImage(
+VkResult anv_OpenPeerImage(
     VkDevice                                    device,
     const VkPeerImageOpenInfo*                  pOpenInfo,
     VkImage*                                    pImage,
@@ -870,86 +1077,72 @@ VkResult VKAPI vkOpenPeerImage(
    return VK_UNSUPPORTED;
 }
 
-static VkResult
-anv_instance_destructor(struct anv_device *     device,
-                        VkObject                object)
-{
-   return vkDestroyInstance(object);
-}
-
-static VkResult
-anv_noop_destructor(struct anv_device *         device,
-                    VkObject                    object)
+VkResult anv_DestroyObject(
+    VkDevice                                    _device,
+    VkObjectType                                objType,
+    VkObject                                    _object)
 {
-   return VK_SUCCESS;
-}
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_object *object = (struct anv_object *) _object;
 
-static VkResult
-anv_device_destructor(struct anv_device *       device,
-                      VkObject                  object)
-{
-   return vkDestroyDevice(object);
-}
+   switch (objType) {
+   case VK_OBJECT_TYPE_INSTANCE:
+      return anv_DestroyInstance((VkInstance) _object);
 
-static VkResult
-anv_cmd_buffer_destructor(struct anv_device *   device,
-                          VkObject              object)
-{
-   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
-   
-   anv_state_stream_finish(&cmd_buffer->surface_state_stream);
-   anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
-   anv_batch_finish(&cmd_buffer->batch, device);
-   anv_device_free(device, cmd_buffer->exec2_objects);
-   anv_device_free(device, cmd_buffer->exec2_bos);
-   anv_device_free(device, cmd_buffer);
+   case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
+      /* We don't want to actually destroy physical devices */
+      return VK_SUCCESS;
 
-   return VK_SUCCESS;
-}
+   case VK_OBJECT_TYPE_DEVICE:
+      assert(_device == (VkDevice) _object);
+      return anv_DestroyDevice((VkDevice) _object);
 
-static VkResult
-anv_pipeline_destructor(struct anv_device *   device,
-                        VkObject              object)
-{
-   struct anv_pipeline *pipeline = (struct anv_pipeline *) object;
+   case VK_OBJECT_TYPE_QUEUE:
+      /* TODO */
+      return VK_SUCCESS;
 
-   return anv_pipeline_destroy(pipeline);
-}
+   case VK_OBJECT_TYPE_DEVICE_MEMORY:
+      return anv_FreeMemory(_device, (VkDeviceMemory) _object);
 
-static VkResult
-anv_free_destructor(struct anv_device *         device,
-                    VkObject                    object)
-{
-   anv_device_free(device, (void *) object);
+   case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
+      /* These are just dummys anyway, so we don't need to destroy them */
+      return VK_SUCCESS;
 
-   return VK_SUCCESS;
-}
+   case VK_OBJECT_TYPE_BUFFER:
+   case VK_OBJECT_TYPE_IMAGE:
+   case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
+   case VK_OBJECT_TYPE_SHADER:
+   case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
+   case VK_OBJECT_TYPE_SAMPLER:
+   case VK_OBJECT_TYPE_DESCRIPTOR_SET:
+   case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
+   case VK_OBJECT_TYPE_DYNAMIC_RS_STATE:
+   case VK_OBJECT_TYPE_DYNAMIC_CB_STATE:
+   case VK_OBJECT_TYPE_DYNAMIC_DS_STATE:
+   case VK_OBJECT_TYPE_RENDER_PASS:
+      /* These are trivially destroyable */
+      anv_device_free(device, (void *) _object);
+      return VK_SUCCESS;
 
-static VkResult (*anv_object_destructors[])(struct anv_device *device,
-                                            VkObject object) = {
-   [VK_OBJECT_TYPE_INSTANCE] =        anv_instance_destructor,
-   [VK_OBJECT_TYPE_PHYSICAL_DEVICE] = anv_noop_destructor,
-   [VK_OBJECT_TYPE_DEVICE] =          anv_device_destructor,
-   [VK_OBJECT_TYPE_QUEUE] =           anv_noop_destructor,
-   [VK_OBJECT_TYPE_COMMAND_BUFFER] =  anv_cmd_buffer_destructor,
-   [VK_OBJECT_TYPE_PIPELINE] =        anv_pipeline_destructor,
-   [VK_OBJECT_TYPE_SHADER] =          anv_free_destructor,
-   [VK_OBJECT_TYPE_BUFFER] =          anv_free_destructor,
-   [VK_OBJECT_TYPE_IMAGE] =           anv_free_destructor,
-   [VK_OBJECT_TYPE_RENDER_PASS] =     anv_free_destructor
-};
+   case VK_OBJECT_TYPE_COMMAND_BUFFER:
+   case VK_OBJECT_TYPE_PIPELINE:
+   case VK_OBJECT_TYPE_DYNAMIC_VP_STATE:
+   case VK_OBJECT_TYPE_FENCE:
+   case VK_OBJECT_TYPE_QUERY_POOL:
+   case VK_OBJECT_TYPE_FRAMEBUFFER:
+   case VK_OBJECT_TYPE_BUFFER_VIEW:
+   case VK_OBJECT_TYPE_IMAGE_VIEW:
+   case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
+      (object->destructor)(device, object, objType);
+      return VK_SUCCESS;
 
-VkResult VKAPI vkDestroyObject(
-    VkDevice                                    _device,
-    VkObjectType                                objType,
-    VkObject                                    object)
-{
-   struct anv_device *device = (struct anv_device *) _device;
+   case VK_OBJECT_TYPE_SEMAPHORE:
+   case VK_OBJECT_TYPE_EVENT:
+      stub_return(VK_UNSUPPORTED);
 
-   assert(objType < ARRAY_SIZE(anv_object_destructors) &&
-          anv_object_destructors[objType] != NULL);
-      
-   return anv_object_destructors[objType](device, object);
+   default:
+      unreachable("Invalid object type");
+   }
 }
 
 static void
@@ -988,7 +1181,19 @@ fill_memory_requirements(
    }
 }
 
-VkResult VKAPI vkGetObjectInfo(
+static uint32_t
+get_allocation_count(VkObjectType objType)
+{
+   switch (objType) {
+   case VK_OBJECT_TYPE_BUFFER:
+   case VK_OBJECT_TYPE_IMAGE:
+      return 1;
+   default:
+      return 0;
+   }
+}
+
+VkResult anv_GetObjectInfo(
     VkDevice                                    _device,
     VkObjectType                                objType,
     VkObject                                    object,
@@ -997,23 +1202,33 @@ VkResult VKAPI vkGetObjectInfo(
     void*                                       pData)
 {
    VkMemoryRequirements memory_requirements;
+   uint32_t *count;
 
    switch (infoType) {
    case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS:
-      fill_memory_requirements(objType, object, &memory_requirements);
-      memcpy(pData, &memory_requirements,
-             MIN2(*pDataSize, sizeof(memory_requirements)));
       *pDataSize = sizeof(memory_requirements);
+      if (pData == NULL)
+         return VK_SUCCESS;
+
+      fill_memory_requirements(objType, object, pData);
       return VK_SUCCESS;
 
    case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT:
+      *pDataSize = sizeof(count);
+      if (pData == NULL)
+         return VK_SUCCESS;
+
+      count = pData;
+      *count = get_allocation_count(objType);
+      return VK_SUCCESS;
+
    default:
-      return VK_UNSUPPORTED;
+      return vk_error(VK_UNSUPPORTED);
    }
 
 }
 
-VkResult VKAPI vkQueueBindObjectMemory(
+VkResult anv_QueueBindObjectMemory(
     VkQueue                                     queue,
     VkObjectType                                objType,
     VkObject                                    object,
@@ -1043,7 +1258,7 @@ VkResult VKAPI vkQueueBindObjectMemory(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkQueueBindObjectMemoryRange(
+VkResult anv_QueueBindObjectMemoryRange(
     VkQueue                                     queue,
     VkObjectType                                objType,
     VkObject                                    object,
@@ -1056,7 +1271,7 @@ VkResult VKAPI vkQueueBindObjectMemoryRange(
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult vkQueueBindImageMemoryRange(
+VkResult anv_QueueBindImageMemoryRange(
     VkQueue                                     queue,
     VkImage                                     image,
     uint32_t                                    allocationIdx,
@@ -1067,42 +1282,147 @@ VkResult vkQueueBindImageMemoryRange(
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult VKAPI vkCreateFence(
-    VkDevice                                    device,
+static void
+anv_fence_destroy(struct anv_device *device,
+                  struct anv_object *object,
+                  VkObjectType obj_type)
+{
+   struct anv_fence *fence = (struct anv_fence *) object;
+
+   assert(obj_type == VK_OBJECT_TYPE_FENCE);
+
+   anv_gem_munmap(fence->bo.map, fence->bo.size);
+   anv_gem_close(device, fence->bo.gem_handle);
+   anv_device_free(device, fence);
+}
+
+VkResult anv_CreateFence(
+    VkDevice                                    _device,
     const VkFenceCreateInfo*                    pCreateInfo,
     VkFence*                                    pFence)
 {
-   stub_return(VK_UNSUPPORTED);
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_fence *fence;
+   struct anv_batch batch;
+   VkResult result;
+
+   const uint32_t fence_size = 128;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
+
+   fence = anv_device_alloc(device, sizeof(*fence), 8,
+                            VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+   if (fence == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   result = anv_bo_init_new(&fence->bo, device, fence_size);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   fence->base.destructor = anv_fence_destroy;
+
+   fence->bo.map =
+      anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
+   batch.next = batch.start = fence->bo.map;
+   batch.end = fence->bo.map + fence->bo.size;
+   anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
+   anv_batch_emit(&batch, GEN8_MI_NOOP);
+
+   fence->exec2_objects[0].handle = fence->bo.gem_handle;
+   fence->exec2_objects[0].relocation_count = 0;
+   fence->exec2_objects[0].relocs_ptr = 0;
+   fence->exec2_objects[0].alignment = 0;
+   fence->exec2_objects[0].offset = fence->bo.offset;
+   fence->exec2_objects[0].flags = 0;
+   fence->exec2_objects[0].rsvd1 = 0;
+   fence->exec2_objects[0].rsvd2 = 0;
+
+   fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
+   fence->execbuf.buffer_count = 1;
+   fence->execbuf.batch_start_offset = 0;
+   fence->execbuf.batch_len = batch.next - fence->bo.map;
+   fence->execbuf.cliprects_ptr = 0;
+   fence->execbuf.num_cliprects = 0;
+   fence->execbuf.DR1 = 0;
+   fence->execbuf.DR4 = 0;
+
+   fence->execbuf.flags =
+      I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
+   fence->execbuf.rsvd1 = device->context_id;
+   fence->execbuf.rsvd2 = 0;
+
+   *pFence = (VkFence) fence;
+
+   return VK_SUCCESS;
+
+ fail:
+   anv_device_free(device, fence);
+
+   return result;
 }
 
-VkResult VKAPI vkResetFences(
-    VkDevice                                    device,
+VkResult anv_ResetFences(
+    VkDevice                                    _device,
     uint32_t                                    fenceCount,
     VkFence*                                    pFences)
 {
-   stub_return(VK_UNSUPPORTED);
+   struct anv_fence **fences = (struct anv_fence **) pFences;
+
+   for (uint32_t i = 0; i < fenceCount; i++)
+      fences[i]->ready = false;
+
+   return VK_SUCCESS;
 }
 
-VkResult VKAPI vkGetFenceStatus(
-    VkDevice                                    device,
-    VkFence                                     fence)
+VkResult anv_GetFenceStatus(
+    VkDevice                                    _device,
+    VkFence                                     _fence)
 {
-   stub_return(VK_UNSUPPORTED);
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_fence *fence = (struct anv_fence *) _fence;
+   int64_t t = 0;
+   int ret;
+
+   if (fence->ready)
+      return VK_SUCCESS;
+
+   ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
+   if (ret == 0) {
+      fence->ready = true;
+      return VK_SUCCESS;
+   }
+   
+   return VK_NOT_READY;
 }
 
-VkResult VKAPI vkWaitForFences(
-    VkDevice                                    device,
+VkResult anv_WaitForFences(
+    VkDevice                                    _device,
     uint32_t                                    fenceCount,
     const VkFence*                              pFences,
     bool32_t                                    waitAll,
     uint64_t                                    timeout)
 {
-   stub_return(VK_UNSUPPORTED);
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_fence **fences = (struct anv_fence **) pFences;
+   int64_t t = timeout;
+   int ret;
+
+   /* FIXME: handle !waitAll */
+
+   for (uint32_t i = 0; i < fenceCount; i++) {
+      ret = anv_gem_wait(device, fences[i]->bo.gem_handle, &t);
+      if (ret == -1 && errno == ETIME)
+         return VK_TIMEOUT;
+      else if (ret == -1)
+         return vk_error(VK_ERROR_UNKNOWN);
+   }      
+
+   return VK_SUCCESS;
 }
 
 // Queue semaphore functions
 
-VkResult VKAPI vkCreateSemaphore(
+VkResult anv_CreateSemaphore(
     VkDevice                                    device,
     const VkSemaphoreCreateInfo*                pCreateInfo,
     VkSemaphore*                                pSemaphore)
@@ -1110,14 +1430,14 @@ VkResult VKAPI vkCreateSemaphore(
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult VKAPI vkQueueSignalSemaphore(
+VkResult anv_QueueSignalSemaphore(
     VkQueue                                     queue,
     VkSemaphore                                 semaphore)
 {
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult VKAPI vkQueueWaitSemaphore(
+VkResult anv_QueueWaitSemaphore(
     VkQueue                                     queue,
     VkSemaphore                                 semaphore)
 {
@@ -1126,7 +1446,7 @@ VkResult VKAPI vkQueueWaitSemaphore(
 
 // Event functions
 
-VkResult VKAPI vkCreateEvent(
+VkResult anv_CreateEvent(
     VkDevice                                    device,
     const VkEventCreateInfo*                    pCreateInfo,
     VkEvent*                                    pEvent)
@@ -1134,99 +1454,36 @@ VkResult VKAPI vkCreateEvent(
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult VKAPI vkGetEventStatus(
+VkResult anv_GetEventStatus(
     VkDevice                                    device,
     VkEvent                                     event)
 {
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult VKAPI vkSetEvent(
+VkResult anv_SetEvent(
     VkDevice                                    device,
     VkEvent                                     event)
 {
    stub_return(VK_UNSUPPORTED);
 }
 
-VkResult VKAPI vkResetEvent(
+VkResult anv_ResetEvent(
     VkDevice                                    device,
     VkEvent                                     event)
 {
    stub_return(VK_UNSUPPORTED);
 }
 
-// Query functions
-
-struct anv_query_pool {
-   VkQueryType                                 type;
-   uint32_t                                    slots;
-   struct anv_bo bo;
-};
+// Buffer functions
 
-VkResult VKAPI vkCreateQueryPool(
+VkResult anv_CreateBuffer(
     VkDevice                                    _device,
-    const VkQueryPoolCreateInfo*                pCreateInfo,
-    VkQueryPool*                                pQueryPool)
+    const VkBufferCreateInfo*                   pCreateInfo,
+    VkBuffer*                                   pBuffer)
 {
    struct anv_device *device = (struct anv_device *) _device;
-   struct anv_query_pool *pool;
-   VkResult result;
-
-   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
-   
-   pool = anv_device_alloc(device, sizeof(*pool), 8,
-                            VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
-   if (pool == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   pool->type = pCreateInfo->queryType;
-   result = anv_bo_init_new(&pool->bo, device, pCreateInfo->slots * 16);
-   if (result != VK_SUCCESS)
-      goto fail;
-
-   *pQueryPool = (VkQueryPool) pool;
-
-   return VK_SUCCESS;
-
- fail:
-   anv_device_free(device, pool);
-
-   return result;
-}
-
-VkResult VKAPI vkGetQueryPoolResults(
-    VkDevice                                    device,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    startQuery,
-    uint32_t                                    queryCount,
-    size_t*                                     pDataSize,
-    void*                                       pData,
-    VkQueryResultFlags                          flags)
-{
-   stub_return(VK_UNSUPPORTED);
-}
-
-// Format capabilities
-
-VkResult VKAPI vkGetFormatInfo(
-    VkDevice                                    device,
-    VkFormat                                    format,
-    VkFormatInfoType                            infoType,
-    size_t*                                     pDataSize,
-    void*                                       pData)
-{
-   stub_return(VK_UNSUPPORTED);
-}
-
-// Buffer functions
-
-VkResult VKAPI vkCreateBuffer(
-    VkDevice                                    _device,
-    const VkBufferCreateInfo*                   pCreateInfo,
-    VkBuffer*                                   pBuffer)
-{
-   struct anv_device *device = (struct anv_device *) _device;
-   struct anv_buffer *buffer;
+   struct anv_buffer *buffer;
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
 
@@ -1246,37 +1503,21 @@ VkResult VKAPI vkCreateBuffer(
 
 // Buffer view functions
 
-VkResult VKAPI vkCreateBufferView(
-    VkDevice                                    _device,
-    const VkBufferViewCreateInfo*               pCreateInfo,
-    VkBufferView*                               pView)
+static void
+fill_buffer_surface_state(void *state, VkFormat format,
+                          uint32_t offset, uint32_t range)
 {
-   struct anv_device *device = (struct anv_device *) _device;
-   struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
-   struct anv_surface_view *view;
-   const struct anv_format *format;
-
-   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
-
-   view = anv_device_alloc(device, sizeof(*view), 8,
-                           VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
-   if (view == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   view->bo = buffer->bo;
-   view->offset = buffer->offset + pCreateInfo->offset;
-   view->surface_state =
-      anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
-   view->format = pCreateInfo->format;
+   const struct anv_format *info;
 
-   format = anv_format_for_vk_format(pCreateInfo->format);
+   info = anv_format_for_vk_format(format);
    /* This assumes RGBA float format. */
    uint32_t stride = 4;
-   uint32_t num_elements = pCreateInfo->range / stride;
+   uint32_t num_elements = range / stride;
+
    struct GEN8_RENDER_SURFACE_STATE surface_state = {
       .SurfaceType = SURFTYPE_BUFFER,
       .SurfaceArray = false,
-      .SurfaceFormat = format->format,
+      .SurfaceFormat = info->surface_format,
       .SurfaceVerticalAlignment = VALIGN4,
       .SurfaceHorizontalAlignment = HALIGN4,
       .TileMode = LINEAR,
@@ -1284,8 +1525,8 @@ VkResult VKAPI vkCreateBufferView(
       .VerticalLineStrideOffset = 0,
       .SamplerL2BypassModeDisable = true,
       .RenderCacheReadWriteMode = WriteOnlyCache,
-      .MemoryObjectControlState = 0, /* FIXME: MOCS */
-      .BaseMipLevel = 0,
+      .MemoryObjectControlState = GEN8_MOCS,
+      .BaseMipLevel = 0.0,
       .SurfaceQPitch = 0,
       .Height = (num_elements >> 7) & 0x3fff,
       .Width = num_elements & 0x7f,
@@ -1306,27 +1547,57 @@ VkResult VKAPI vkCreateBufferView(
       .ShaderChannelSelectGreen = SCS_GREEN,
       .ShaderChannelSelectBlue = SCS_BLUE,
       .ShaderChannelSelectAlpha = SCS_ALPHA,
-      .ResourceMinLOD = 0,
+      .ResourceMinLOD = 0.0,
       /* FIXME: We assume that the image must be bound at this time. */
-      .SurfaceBaseAddress = { NULL, view->offset },
+      .SurfaceBaseAddress = { NULL, offset },
    };
 
-   GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state);
+   GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
+}
+
+VkResult anv_CreateBufferView(
+    VkDevice                                    _device,
+    const VkBufferViewCreateInfo*               pCreateInfo,
+    VkBufferView*                               pView)
+{
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
+   struct anv_surface_view *view;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
+
+   view = anv_device_alloc(device, sizeof(*view), 8,
+                           VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+   if (view == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   view->base.destructor = anv_surface_view_destroy;
+
+   view->bo = buffer->bo;
+   view->offset = buffer->offset + pCreateInfo->offset;
+   view->surface_state =
+      anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
+   view->format = pCreateInfo->format;
+   view->range = pCreateInfo->range;
+
+   fill_buffer_surface_state(view->surface_state.map,
+                             pCreateInfo->format, view->offset, pCreateInfo->range);
 
-   *pView = (VkImageView) view;
+   *pView = (VkBufferView) view;
 
    return VK_SUCCESS;
 }
 
 // Sampler functions
 
-VkResult VKAPI vkCreateSampler(
+VkResult anv_CreateSampler(
     VkDevice                                    _device,
     const VkSamplerCreateInfo*                  pCreateInfo,
     VkSampler*                                  pSampler)
 {
    struct anv_device *device = (struct anv_device *) _device;
    struct anv_sampler *sampler;
+   uint32_t mag_filter, min_filter, max_anisotropy;
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
 
@@ -1336,58 +1607,69 @@ VkResult VKAPI vkCreateSampler(
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
    static const uint32_t vk_to_gen_tex_filter[] = {
-      [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
-      [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
+      [VK_TEX_FILTER_NEAREST]                   = MAPFILTER_NEAREST,
+      [VK_TEX_FILTER_LINEAR]                    = MAPFILTER_LINEAR
    };
 
    static const uint32_t vk_to_gen_mipmap_mode[] = {
-      [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
-      [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
-      [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
+      [VK_TEX_MIPMAP_MODE_BASE]                 = MIPFILTER_NONE,
+      [VK_TEX_MIPMAP_MODE_NEAREST]              = MIPFILTER_NEAREST,
+      [VK_TEX_MIPMAP_MODE_LINEAR]               = MIPFILTER_LINEAR
    };
 
    static const uint32_t vk_to_gen_tex_address[] = {
-      [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
-      [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
-      [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
-      [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
-      [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
+      [VK_TEX_ADDRESS_WRAP]                     = TCM_WRAP,
+      [VK_TEX_ADDRESS_MIRROR]                   = TCM_MIRROR,
+      [VK_TEX_ADDRESS_CLAMP]                    = TCM_CLAMP,
+      [VK_TEX_ADDRESS_MIRROR_ONCE]              = TCM_MIRROR_ONCE,
+      [VK_TEX_ADDRESS_CLAMP_BORDER]             = TCM_CLAMP_BORDER,
    };
 
    static const uint32_t vk_to_gen_compare_op[] = {
-      [VK_COMPARE_OP_NEVER]  = PREFILTEROPNEVER,
-      [VK_COMPARE_OP_LESS]  = PREFILTEROPLESS,
-      [VK_COMPARE_OP_EQUAL]  = PREFILTEROPEQUAL,
-      [VK_COMPARE_OP_LESS_EQUAL]  = PREFILTEROPLEQUAL,
-      [VK_COMPARE_OP_GREATER]  = PREFILTEROPGREATER,
-      [VK_COMPARE_OP_NOT_EQUAL]  = PREFILTEROPNOTEQUAL,
-      [VK_COMPARE_OP_GREATER_EQUAL]  = PREFILTEROPGEQUAL,
-      [VK_COMPARE_OP_ALWAYS]  = PREFILTEROPALWAYS,
+      [VK_COMPARE_OP_NEVER]                     = PREFILTEROPNEVER,
+      [VK_COMPARE_OP_LESS]                      = PREFILTEROPLESS,
+      [VK_COMPARE_OP_EQUAL]                     = PREFILTEROPEQUAL,
+      [VK_COMPARE_OP_LESS_EQUAL]                = PREFILTEROPLEQUAL,
+      [VK_COMPARE_OP_GREATER]                   = PREFILTEROPGREATER,
+      [VK_COMPARE_OP_NOT_EQUAL]                 = PREFILTEROPNOTEQUAL,
+      [VK_COMPARE_OP_GREATER_EQUAL]             = PREFILTEROPGEQUAL,
+      [VK_COMPARE_OP_ALWAYS]                    = PREFILTEROPALWAYS,
    };
 
-   if (pCreateInfo->maxAnisotropy > 0)
-       anv_finishme("missing support for anisotropic filtering");
-
+   if (pCreateInfo->maxAnisotropy > 1) {
+      mag_filter = MAPFILTER_ANISOTROPIC;
+      min_filter = MAPFILTER_ANISOTROPIC;
+      max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
+   } else {
+      mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
+      min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
+      max_anisotropy = RATIO21;
+   }
+   
    struct GEN8_SAMPLER_STATE sampler_state = {
       .SamplerDisable = false,
       .TextureBorderColorMode = DX10OGL,
       .LODPreClampMode = 0,
-      .BaseMipLevel = 0,
+      .BaseMipLevel = 0.0,
       .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
-      .MagModeFilter = vk_to_gen_tex_filter[pCreateInfo->magFilter],
-      .MinModeFilter = vk_to_gen_tex_filter[pCreateInfo->minFilter],
+      .MagModeFilter = mag_filter,
+      .MinModeFilter = min_filter,
       .TextureLODBias = pCreateInfo->mipLodBias * 256,
       .AnisotropicAlgorithm = EWAApproximation,
-      .MinLOD = pCreateInfo->minLod * 256,
-      .MaxLOD = pCreateInfo->maxLod * 256,
+      .MinLOD = pCreateInfo->minLod,
+      .MaxLOD = pCreateInfo->maxLod,
       .ChromaKeyEnable = 0,
       .ChromaKeyIndex = 0,
       .ChromaKeyMode = 0,
       .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
       .CubeSurfaceControlMode = 0,
-      .IndirectStatePointer = 0,
+
+      .IndirectStatePointer =
+         device->float_border_colors.offset +
+         pCreateInfo->borderColor * sizeof(float) * 4,
+
       .LODClampMagnificationMode = MIPNONE,
-      .MaximumAnisotropy = 0,
+      .MaximumAnisotropy = max_anisotropy,
       .RAddressMinFilterRoundingEnable = 0,
       .RAddressMagFilterRoundingEnable = 0,
       .VAddressMinFilterRoundingEnable = 0,
@@ -1410,7 +1692,7 @@ VkResult VKAPI vkCreateSampler(
 
 // Descriptor set functions
 
-VkResult VKAPI vkCreateDescriptorSetLayout(
+VkResult anv_CreateDescriptorSetLayout(
     VkDevice                                    _device,
     const VkDescriptorSetLayoutCreateInfo*      pCreateInfo,
     VkDescriptorSetLayout*                      pSetLayout)
@@ -1424,21 +1706,22 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
    uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, };
    uint32_t num_dynamic_buffers = 0;
    uint32_t count = 0;
+   uint32_t stages = 0;
    uint32_t s;
 
    for (uint32_t i = 0; i < pCreateInfo->count; i++) {
       switch (pCreateInfo->pBinding[i].descriptorType) {
       case VK_DESCRIPTOR_TYPE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
          for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
             sampler_count[s] += pCreateInfo->pBinding[i].count;
          break;
+      default:
+         break;
+      }
 
+      switch (pCreateInfo->pBinding[i].descriptorType) {
       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-         for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
-            sampler_count[s] += pCreateInfo->pBinding[i].count;
-
-         /* fall through */
-
       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
@@ -1454,18 +1737,17 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
          break;
       }
 
-      count += pCreateInfo->pBinding[i].count;
-   }
-
-   for (uint32_t i = 0; i < pCreateInfo->count; i++) {
       switch (pCreateInfo->pBinding[i].descriptorType) {
       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-         num_dynamic_buffers++;
+         num_dynamic_buffers += pCreateInfo->pBinding[i].count;
          break;
       default:
          break;
       }
+
+      stages |= pCreateInfo->pBinding[i].stageFlags;
+      count += pCreateInfo->pBinding[i].count;
    }
 
    uint32_t sampler_total = 0;
@@ -1476,7 +1758,7 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
    }
 
    size_t size = sizeof(*set_layout) +
-      (sampler_total + surface_total) * sizeof(uint32_t);
+      (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
    set_layout = anv_device_alloc(device, size, 8,
                                  VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
    if (!set_layout)
@@ -1484,10 +1766,11 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
 
    set_layout->num_dynamic_buffers = num_dynamic_buffers;
    set_layout->count = count;
+   set_layout->shader_stages = stages;
 
-   uint32_t *p = set_layout->entries;
-   uint32_t *sampler[VK_NUM_SHADER_STAGE];
-   uint32_t *surface[VK_NUM_SHADER_STAGE];
+   struct anv_descriptor_slot *p = set_layout->entries;
+   struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE];
+   struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE];
    for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
       set_layout->stage[s].surface_count = surface_count[s];
       set_layout->stage[s].surface_start = surface[s] = p;
@@ -1498,21 +1781,35 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
    }
 
    uint32_t descriptor = 0;
+   int8_t dynamic_slot = 0;
+   bool is_dynamic;
    for (uint32_t i = 0; i < pCreateInfo->count; i++) {
       switch (pCreateInfo->pBinding[i].descriptorType) {
       case VK_DESCRIPTOR_TYPE_SAMPLER:
-         for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
-            for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++)
-               *(sampler[s])++ = descriptor + j;
-         break;
-
       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
          for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
-            for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++)
-               *(sampler[s])++ = descriptor + j;
+            for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
+               sampler[s]->index = descriptor + j;
+               sampler[s]->dynamic_slot = -1;
+               sampler[s]++;
+            }
+         break;
+      default:
+         break;
+      }
 
-         /* fallthrough */
+      switch (pCreateInfo->pBinding[i].descriptorType) {
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         is_dynamic = true;
+         break;
+      default:
+         is_dynamic = false;
+         break;
+      }
 
+      switch (pCreateInfo->pBinding[i].descriptorType) {
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
@@ -1523,12 +1820,21 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
          for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
             for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
-               *(surface[s])++ = descriptor + j;
+               surface[s]->index = descriptor + j;
+               if (is_dynamic)
+                  surface[s]->dynamic_slot = dynamic_slot + j;
+               else
+                  surface[s]->dynamic_slot = -1;
+               surface[s]++;
             }
          break;
       default:
-         unreachable("");
+         break;
       }
+
+      if (is_dynamic)
+         dynamic_slot += pCreateInfo->pBinding[i].count;
+
       descriptor += pCreateInfo->pBinding[i].count;
    }
 
@@ -1537,38 +1843,40 @@ VkResult VKAPI vkCreateDescriptorSetLayout(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkBeginDescriptorPoolUpdate(
+VkResult anv_BeginDescriptorPoolUpdate(
     VkDevice                                    device,
     VkDescriptorUpdateMode                      updateMode)
 {
-   stub_return(VK_UNSUPPORTED);
+   return VK_SUCCESS;
 }
 
-VkResult VKAPI vkEndDescriptorPoolUpdate(
+VkResult anv_EndDescriptorPoolUpdate(
     VkDevice                                    device,
     VkCmdBuffer                                 cmd)
 {
-   stub_return(VK_UNSUPPORTED);
+   return VK_SUCCESS;
 }
 
-VkResult VKAPI vkCreateDescriptorPool(
+VkResult anv_CreateDescriptorPool(
     VkDevice                                    device,
     VkDescriptorPoolUsage                       poolUsage,
     uint32_t                                    maxSets,
     const VkDescriptorPoolCreateInfo*           pCreateInfo,
     VkDescriptorPool*                           pDescriptorPool)
 {
-   stub_return(VK_UNSUPPORTED);
+   *pDescriptorPool = 1;
+
+   return VK_SUCCESS;
 }
 
-VkResult VKAPI vkResetDescriptorPool(
+VkResult anv_ResetDescriptorPool(
     VkDevice                                    device,
     VkDescriptorPool                            descriptorPool)
 {
-   stub_return(VK_UNSUPPORTED);
+   return VK_SUCCESS;
 }
 
-VkResult VKAPI vkAllocDescriptorSets(
+VkResult anv_AllocDescriptorSets(
     VkDevice                                    _device,
     VkDescriptorPool                            descriptorPool,
     VkDescriptorSetUsage                        setUsage,
@@ -1592,24 +1900,28 @@ VkResult VKAPI vkAllocDescriptorSets(
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
+      /* Descriptor sets may not be 100% filled out so we need to memset to
+       * ensure that we can properly detect and handle holes.
+       */
+      memset(set, 0, size);
+
       pDescriptorSets[i] = (VkDescriptorSet) set;
    }
 
    *pCount = count;
 
-   return VK_UNSUPPORTED;
+   return VK_SUCCESS;
 }
 
-void VKAPI vkClearDescriptorSets(
+void anv_ClearDescriptorSets(
     VkDevice                                    device,
     VkDescriptorPool                            descriptorPool,
     uint32_t                                    count,
     const VkDescriptorSet*                      pDescriptorSets)
 {
-   stub();
 }
 
-void VKAPI vkUpdateDescriptors(
+void anv_UpdateDescriptors(
     VkDevice                                    _device,
     VkDescriptorSet                             descriptorSet,
     uint32_t                                    updateCount,
@@ -1692,7 +2004,23 @@ clamp_int64(int64_t x, int64_t min, int64_t max)
       return max;
 }
 
-VkResult VKAPI vkCreateDynamicViewportState(
+static void
+anv_dynamic_vp_state_destroy(struct anv_device *device,
+                             struct anv_object *object,
+                             VkObjectType obj_type)
+{
+   struct anv_dynamic_vp_state *state = (void *)object;
+
+   assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE);
+
+   anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp);
+   anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp);
+   anv_state_pool_free(&device->dynamic_state_pool, state->scissor);
+
+   anv_device_free(device, state);
+}
+
+VkResult anv_CreateDynamicViewportState(
     VkDevice                                    _device,
     const VkDynamicVpStateCreateInfo*           pCreateInfo,
     VkDynamicVpState*                           pState)
@@ -1707,6 +2035,8 @@ VkResult VKAPI vkCreateDynamicViewportState(
    if (state == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   state->base.destructor = anv_dynamic_vp_state_destroy;
+
    unsigned count = pCreateInfo->viewportAndScissorCount;
    state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
                                             count * 64, 64);
@@ -1777,7 +2107,7 @@ VkResult VKAPI vkCreateDynamicViewportState(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkCreateDynamicRasterState(
+VkResult anv_CreateDynamicRasterState(
     VkDevice                                    _device,
     const VkDynamicRsStateCreateInfo*           pCreateInfo,
     VkDynamicRsState*                           pState)
@@ -1793,9 +2123,6 @@ VkResult VKAPI vkCreateDynamicRasterState(
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
    /* Missing these:
-    * float                                       depthBias;
-    * float                                       depthBiasClamp;
-    * float                                       slopeScaledDepthBias;
     * float                                       pointFadeThreshold;
     *                            // optional (GL45) - Size of point fade threshold
     */
@@ -1808,12 +2135,25 @@ VkResult VKAPI vkCreateDynamicRasterState(
 
    GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
 
+   bool enable_bias = pCreateInfo->depthBias != 0.0f ||
+      pCreateInfo->slopeScaledDepthBias != 0.0f;
+   struct GEN8_3DSTATE_RASTER raster = {
+      .GlobalDepthOffsetEnableSolid = enable_bias,
+      .GlobalDepthOffsetEnableWireframe = enable_bias,
+      .GlobalDepthOffsetEnablePoint = enable_bias,
+      .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
+      .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
+      .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
+   };
+
+   GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
+
    *pState = (VkDynamicRsState) state;
 
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkCreateDynamicColorBlendState(
+VkResult anv_CreateDynamicColorBlendState(
     VkDevice                                    _device,
     const VkDynamicCbStateCreateInfo*           pCreateInfo,
     VkDynamicCbState*                           pState)
@@ -1828,22 +2168,141 @@ VkResult VKAPI vkCreateDynamicColorBlendState(
    if (state == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   struct GEN8_COLOR_CALC_STATE color_calc_state = {
+      .BlendConstantColorRed = pCreateInfo->blendConst[0],
+      .BlendConstantColorGreen = pCreateInfo->blendConst[1],
+      .BlendConstantColorBlue = pCreateInfo->blendConst[2],
+      .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
+   };
+
+   GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
+
    *pState = (VkDynamicCbState) state;
 
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkCreateDynamicDepthStencilState(
-    VkDevice                                    device,
+VkResult anv_CreateDynamicDepthStencilState(
+    VkDevice                                    _device,
     const VkDynamicDsStateCreateInfo*           pCreateInfo,
     VkDynamicDsState*                           pState)
 {
-   stub_return(VK_UNSUPPORTED);
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_dynamic_ds_state *state;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO);
+
+   state = anv_device_alloc(device, sizeof(*state), 8,
+                            VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+   if (state == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
+      GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
+
+      /* Is this what we need to do? */
+      .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
+
+      .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
+      .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
+
+      .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
+      .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
+   };
+
+   GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
+                                      &wm_depth_stencil);
+
+   struct GEN8_COLOR_CALC_STATE color_calc_state = {
+      .StencilReferenceValue = pCreateInfo->stencilFrontRef,
+      .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
+   };
+
+   GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
+
+   *pState = (VkDynamicDsState) state;
+
+   return VK_SUCCESS;
 }
 
 // Command buffer functions
 
-VkResult VKAPI vkCreateCommandBuffer(
+static void
+anv_cmd_buffer_destroy(struct anv_device *device,
+                       struct anv_object *object,
+                       VkObjectType obj_type)
+{
+   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
+
+   assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
+
+   /* Destroy all of the batch buffers */
+   struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
+   while (bbo) {
+      struct anv_batch_bo *prev = bbo->prev_batch_bo;
+      anv_batch_bo_destroy(bbo, device);
+      bbo = prev;
+   }
+   anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
+
+   /* Destroy all of the surface state buffers */
+   bbo = cmd_buffer->surface_batch_bo;
+   while (bbo) {
+      struct anv_batch_bo *prev = bbo->prev_batch_bo;
+      anv_batch_bo_destroy(bbo, device);
+      bbo = prev;
+   }
+   anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
+
+   anv_state_stream_finish(&cmd_buffer->surface_state_stream);
+   anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
+   anv_device_free(device, cmd_buffer->exec2_objects);
+   anv_device_free(device, cmd_buffer->exec2_bos);
+   anv_device_free(device, cmd_buffer);
+}
+
+static VkResult
+anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
+{
+   struct anv_cmd_buffer *cmd_buffer = _data;
+
+   struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
+
+   VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* We set the end of the batch a little short so we would be sure we
+    * have room for the chaining command.  Since we're about to emit the
+    * chaining command, let's set it back where it should go.
+    */
+   batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
+   assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
+
+   anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
+      GEN8_MI_BATCH_BUFFER_START_header,
+      ._2ndLevelBatchBuffer = _1stlevelbatch,
+      .AddressSpaceIndicator = ASI_PPGTT,
+      .BatchBufferStartAddress = { &new_bbo->bo, 0 },
+   );
+
+   /* Pad out to a 2-dword aligned boundary with zeros */
+   if ((uintptr_t)batch->next % 8 != 0) {
+      *(uint32_t *)batch->next = 0;
+      batch->next += 4;
+   }
+
+   anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
+
+   new_bbo->prev_batch_bo = old_bbo;
+   cmd_buffer->last_batch_bo = new_bbo;
+
+   anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
+
+   return VK_SUCCESS;
+}
+
+VkResult anv_CreateCommandBuffer(
     VkDevice                                    _device,
     const VkCmdBufferCreateInfo*                pCreateInfo,
     VkCmdBuffer*                                pCmdBuffer)
@@ -1857,31 +2316,45 @@ VkResult VKAPI vkCreateCommandBuffer(
    if (cmd_buffer == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   cmd_buffer->base.destructor = anv_cmd_buffer_destroy;
+
    cmd_buffer->device = device;
    cmd_buffer->rs_state = NULL;
    cmd_buffer->vp_state = NULL;
-   memset(&cmd_buffer->default_bindings, 0, sizeof(cmd_buffer->default_bindings));
-   cmd_buffer->bindings = &cmd_buffer->default_bindings;
+   cmd_buffer->cb_state = NULL;
+   cmd_buffer->ds_state = NULL;
+   memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
 
-   result = anv_batch_init(&cmd_buffer->batch, device);
+   result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
    if (result != VK_SUCCESS)
       goto fail;
 
-   cmd_buffer->exec2_objects =
-      anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8,
-                       VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
-   if (cmd_buffer->exec2_objects == NULL) {
-      result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-      goto fail_batch;
-   }
+   result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
+   if (result != VK_SUCCESS)
+      goto fail_batch_bo;
 
-   cmd_buffer->exec2_bos =
-      anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_bos[0]), 8,
-                       VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
-   if (cmd_buffer->exec2_bos == NULL) {
-      result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-      goto fail_exec2_objects;
-   }
+   cmd_buffer->batch.device = device;
+   cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
+   cmd_buffer->batch.user_data = cmd_buffer;
+
+   anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
+                      GEN8_MI_BATCH_BUFFER_START_length * 4);
+
+   result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
+   if (result != VK_SUCCESS)
+      goto fail_batch_relocs;
+   cmd_buffer->surface_batch_bo->first_reloc = 0;
+
+   result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
+   if (result != VK_SUCCESS)
+      goto fail_ss_batch_bo;
+
+   /* Start surface_next at 1 so surface offset 0 is invalid. */
+   cmd_buffer->surface_next = 1;
+
+   cmd_buffer->exec2_objects = NULL;
+   cmd_buffer->exec2_bos = NULL;
+   cmd_buffer->exec2_array_length = 0;
 
    anv_state_stream_init(&cmd_buffer->surface_state_stream,
                          &device->surface_state_block_pool);
@@ -1890,105 +2363,127 @@ VkResult VKAPI vkCreateCommandBuffer(
 
    cmd_buffer->dirty = 0;
    cmd_buffer->vb_dirty = 0;
+   cmd_buffer->descriptors_dirty = 0;
+   cmd_buffer->pipeline = NULL;
+   cmd_buffer->vp_state = NULL;
+   cmd_buffer->rs_state = NULL;
+   cmd_buffer->ds_state = NULL;
 
    *pCmdBuffer = (VkCmdBuffer) cmd_buffer;
 
    return VK_SUCCESS;
 
- fail_exec2_objects:
-   anv_device_free(device, cmd_buffer->exec2_objects);
- fail_batch:
-   anv_batch_finish(&cmd_buffer->batch, device);
+ fail_ss_batch_bo:
+   anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
+ fail_batch_relocs:
+   anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
+ fail_batch_bo:
+   anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
  fail:
    anv_device_free(device, cmd_buffer);
 
    return result;
 }
 
-VkResult VKAPI vkBeginCommandBuffer(
-    VkCmdBuffer                                 cmdBuffer,
-    const VkCmdBufferBeginInfo*                 pBeginInfo)
+static void
+anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
 {
-   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
    struct anv_device *device = cmd_buffer->device;
+   struct anv_bo *scratch_bo = NULL;
 
-   anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
-                  .PipelineSelection = _3D);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP);
+   cmd_buffer->scratch_size = device->scratch_block_pool.size;
+   if (cmd_buffer->scratch_size > 0)
+      scratch_bo = &device->scratch_block_pool.bo;
 
    anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
-                  .GeneralStateBaseAddress = { NULL, 0 },
+                  .GeneralStateBaseAddress = { scratch_bo, 0 },
+                  .GeneralStateMemoryObjectControlState = GEN8_MOCS,
                   .GeneralStateBaseAddressModifyEnable = true,
                   .GeneralStateBufferSize = 0xfffff,
                   .GeneralStateBufferSizeModifyEnable = true,
 
-                  .SurfaceStateBaseAddress = { &device->surface_state_block_pool.bo, 0 },
-                  .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */
+                  .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
+                  .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
                   .SurfaceStateBaseAddressModifyEnable = true,
 
                   .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
+                  .DynamicStateMemoryObjectControlState = GEN8_MOCS,
                   .DynamicStateBaseAddressModifyEnable = true,
                   .DynamicStateBufferSize = 0xfffff,
                   .DynamicStateBufferSizeModifyEnable = true,
 
                   .IndirectObjectBaseAddress = { NULL, 0 },
+                  .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
                   .IndirectObjectBaseAddressModifyEnable = true,
                   .IndirectObjectBufferSize = 0xfffff,
                   .IndirectObjectBufferSizeModifyEnable = true,
-                  
+
                   .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
+                  .InstructionMemoryObjectControlState = GEN8_MOCS,
                   .InstructionBaseAddressModifyEnable = true,
                   .InstructionBufferSize = 0xfffff,
                   .InstructionBuffersizeModifyEnable = true);
+}
 
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF_STATISTICS,
-                   .StatisticsEnable = true);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HS, .Enable = false);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_TE, .TEEnable = false);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DS, .FunctionEnable = false);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false);
-
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS,
-                  .ConstantBufferOffset = 0,
-                  .ConstantBufferSize = 4);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS,
-                  .ConstantBufferOffset = 4,
-                  .ConstantBufferSize = 4);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS,
-                  .ConstantBufferOffset = 8,
-                  .ConstantBufferSize = 4);
-
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_CHROMAKEY,
-                  .ChromaKeyKillEnable = false);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ);
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS);
-
-   /* Hardcoded state: */
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
-                  .SurfaceType = SURFTYPE_2D,
-                  .Width = 1,
-                  .Height = 1,
-                  .SurfaceFormat = D16_UNORM,
-                  .SurfaceBaseAddress = { NULL, 0 },
-                  .HierarchicalDepthBufferEnable = 0);
-   
-   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_DEPTH_STENCIL,
-                  .DepthTestEnable = false,
-                  .DepthBufferWriteEnable = false);
+VkResult anv_BeginCommandBuffer(
+    VkCmdBuffer                                 cmdBuffer,
+    const VkCmdBufferBeginInfo*                 pBeginInfo)
+{
+   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+
+   anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+   cmd_buffer->current_pipeline = UINT32_MAX;
 
    return VK_SUCCESS;
 }
 
-static void
+static VkResult
 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
-                      struct anv_bo *bo, struct anv_reloc_list *list)
+                      struct anv_bo *bo,
+                      struct drm_i915_gem_relocation_entry *relocs,
+                      size_t num_relocs)
 {
    struct drm_i915_gem_exec_object2 *obj;
 
-   bo->index = cmd_buffer->bo_count;
+   if (bo->index < cmd_buffer->bo_count &&
+       cmd_buffer->exec2_bos[bo->index] == bo)
+      return VK_SUCCESS;
+
+   if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
+      uint32_t new_len = cmd_buffer->exec2_objects ?
+                         cmd_buffer->exec2_array_length * 2 : 64;
+
+      struct drm_i915_gem_exec_object2 *new_objects =
+         anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
+                          8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+      if (new_objects == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      struct anv_bo **new_bos =
+         anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
+                          8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+      if (new_objects == NULL) {
+         anv_device_free(cmd_buffer->device, new_objects);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+
+      if (cmd_buffer->exec2_objects) {
+         memcpy(new_objects, cmd_buffer->exec2_objects,
+                cmd_buffer->bo_count * sizeof(*new_objects));
+         memcpy(new_bos, cmd_buffer->exec2_bos,
+                cmd_buffer->bo_count * sizeof(*new_bos));
+      }
+
+      cmd_buffer->exec2_objects = new_objects;
+      cmd_buffer->exec2_bos = new_bos;
+      cmd_buffer->exec2_array_length = new_len;
+   }
+
+   assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
+
+   bo->index = cmd_buffer->bo_count++;
    obj = &cmd_buffer->exec2_objects[bo->index];
    cmd_buffer->exec2_bos[bo->index] = bo;
-   cmd_buffer->bo_count++;
 
    obj->handle = bo->gem_handle;
    obj->relocation_count = 0;
@@ -1999,32 +2494,21 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
    obj->rsvd1 = 0;
    obj->rsvd2 = 0;
 
-   if (list) {
-      obj->relocation_count = list->num_relocs;
-      obj->relocs_ptr = (uintptr_t) list->relocs;
+   if (relocs) {
+      obj->relocation_count = num_relocs;
+      obj->relocs_ptr = (uintptr_t) relocs;
    }
+
+   return VK_SUCCESS;
 }
 
 static void
 anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
                                 struct anv_reloc_list *list)
 {
-   struct anv_bo *bo, *batch_bo;
-
-   batch_bo = &cmd_buffer->batch.bo;
-   for (size_t i = 0; i < list->num_relocs; i++) {
-      bo = list->reloc_bos[i];
-      /* Skip any relocations targeting the batch bo. We need to make sure
-       * it's the last in the list so we'll add it manually later.
-       */
-      if (bo == batch_bo)
-         continue;
-      if (bo->index < cmd_buffer->bo_count && cmd_buffer->exec2_bos[bo->index] == bo)
-         continue;
-
-      anv_cmd_buffer_add_bo(cmd_buffer, bo, NULL);
-   }
-}
+   for (size_t i = 0; i < list->num_relocs; i++)
+      anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
+}
 
 static void
 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
@@ -2049,7 +2533,7 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
-VkResult VKAPI vkEndCommandBuffer(
+VkResult anv_EndCommandBuffer(
     VkCmdBuffer                                 cmdBuffer)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
@@ -2059,29 +2543,57 @@ VkResult VKAPI vkEndCommandBuffer(
    anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
 
    /* Round batch up to an even number of dwords. */
-   if ((batch->next - batch->bo.map) & 4)
+   if ((batch->next - batch->start) & 4)
       anv_batch_emit(batch, GEN8_MI_NOOP);
 
+   anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
+   cmd_buffer->surface_batch_bo->num_relocs =
+      cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
+   cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
+
    cmd_buffer->bo_count = 0;
    cmd_buffer->need_reloc = false;
 
    /* Lock for access to bo->index. */
    pthread_mutex_lock(&device->mutex);
 
-   /* Add block pool bos first so we can add them with their relocs. */
-   anv_cmd_buffer_add_bo(cmd_buffer, &device->surface_state_block_pool.bo,
-                         &batch->surf_relocs);
+   /* Add surface state bos first so we can add them with their relocs. */
+   for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
+        bbo != NULL; bbo = bbo->prev_batch_bo) {
+      anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
+                            &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
+                            bbo->num_relocs);
+   }
+
+   /* Add all of the BOs referenced by surface state */
+   anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
+
+   /* Add all but the first batch BO */
+   struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
+   while (batch_bo->prev_batch_bo) {
+      anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
+                            &batch->relocs.relocs[batch_bo->first_reloc],
+                            batch_bo->num_relocs);
+      batch_bo = batch_bo->prev_batch_bo;
+   }
+
+   /* Add everything referenced by the batches */
+   anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
+
+   /* Add the first batch bo last */
+   assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
+   anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
+                         &batch->relocs.relocs[batch_bo->first_reloc],
+                         batch_bo->num_relocs);
+   assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
 
-   anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->surf_relocs);
-   anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs);
-   anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs);
-   anv_cmd_buffer_process_relocs(cmd_buffer, &batch->surf_relocs);
-   anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs);
+   anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
+   anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
 
    cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
    cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
    cmd_buffer->execbuf.batch_start_offset = 0;
-   cmd_buffer->execbuf.batch_len = batch->next - batch->bo.map;
+   cmd_buffer->execbuf.batch_len = batch->next - batch->start;
    cmd_buffer->execbuf.cliprects_ptr = 0;
    cmd_buffer->execbuf.num_cliprects = 0;
    cmd_buffer->execbuf.DR1 = 0;
@@ -2099,63 +2611,154 @@ VkResult VKAPI vkEndCommandBuffer(
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkResetCommandBuffer(
+VkResult anv_ResetCommandBuffer(
     VkCmdBuffer                                 cmdBuffer)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
 
-   anv_batch_reset(&cmd_buffer->batch);
+   /* Delete all but the first batch bo */
+   while (cmd_buffer->last_batch_bo->prev_batch_bo) {
+      struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
+      anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
+      cmd_buffer->last_batch_bo = prev;
+   }
+   assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
+
+   cmd_buffer->batch.relocs.num_relocs = 0;
+   anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
+                      GEN8_MI_BATCH_BUFFER_START_length * 4);
+
+   /* Delete all but the first batch bo */
+   while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
+      struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
+      anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
+      cmd_buffer->surface_batch_bo = prev;
+   }
+   assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
+
+   cmd_buffer->surface_next = 1;
+   cmd_buffer->surface_relocs.num_relocs = 0;
+
+   cmd_buffer->rs_state = NULL;
+   cmd_buffer->vp_state = NULL;
+   cmd_buffer->cb_state = NULL;
+   cmd_buffer->ds_state = NULL;
 
    return VK_SUCCESS;
 }
 
 // Command buffer building functions
 
-void VKAPI vkCmdBindPipeline(
+void anv_CmdBindPipeline(
     VkCmdBuffer                                 cmdBuffer,
     VkPipelineBindPoint                         pipelineBindPoint,
     VkPipeline                                  _pipeline)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+   struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline;
 
-   cmd_buffer->pipeline = (struct anv_pipeline *) _pipeline;
-   cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+   switch (pipelineBindPoint) {
+   case VK_PIPELINE_BIND_POINT_COMPUTE:
+      cmd_buffer->compute_pipeline = pipeline;
+      cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+      break;
+
+   case VK_PIPELINE_BIND_POINT_GRAPHICS:
+      cmd_buffer->pipeline = pipeline;
+      cmd_buffer->vb_dirty |= pipeline->vb_used;
+      cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+      break;
+
+   default:
+      assert(!"invalid bind point");
+      break;
+   }
 }
 
-void VKAPI vkCmdBindDynamicStateObject(
+void anv_CmdBindDynamicStateObject(
     VkCmdBuffer                                 cmdBuffer,
     VkStateBindPoint                            stateBindPoint,
     VkDynamicStateObject                        dynamicState)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   struct anv_dynamic_vp_state *vp_state;
 
    switch (stateBindPoint) {
    case VK_STATE_BIND_POINT_VIEWPORT:
-      vp_state = (struct anv_dynamic_vp_state *) dynamicState;
-      /* We emit state immediately, but set cmd_buffer->vp_state to indicate
-       * that vp state has been set in this command buffer. */
-      cmd_buffer->vp_state = vp_state;
-      anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
-                     .ScissorRectPointer = vp_state->scissor.offset);
-      anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
-                     .CCViewportPointer = vp_state->cc_vp.offset);
-      anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
-                     .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
+      cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState;
+      cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
       break;
    case VK_STATE_BIND_POINT_RASTER:
       cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState;
       cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
       break;
    case VK_STATE_BIND_POINT_COLOR_BLEND:
+      cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState;
+      cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
+      break;
    case VK_STATE_BIND_POINT_DEPTH_STENCIL:
+      cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState;
+      cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY;
       break;
    default:
       break;
    };
 }
 
-void VKAPI vkCmdBindDescriptorSets(
+static struct anv_state
+anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
+                                   uint32_t size, uint32_t alignment)
+{
+   struct anv_state state;
+
+   state.offset = align_u32(cmd_buffer->surface_next, alignment);
+   if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
+      return (struct anv_state) { 0 };
+
+   state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
+   state.alloc_size = size;
+   cmd_buffer->surface_next = state.offset + size;
+
+   assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
+
+   return state;
+}
+
+static VkResult
+anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
+
+   /* Finish off the old buffer */
+   old_bbo->num_relocs =
+      cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
+   old_bbo->length = cmd_buffer->surface_next;
+
+   VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
+   cmd_buffer->surface_next = 1;
+
+   new_bbo->prev_batch_bo = old_bbo;
+   cmd_buffer->surface_batch_bo = new_bbo;
+
+   /* Re-emit state base addresses so we get the new surface state base
+    * address before we start emitting binding tables etc.
+    */
+   anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+
+   /* It seems like just changing the state base addresses isn't enough.
+    * Invalidating the cache seems to be enough to cause things to
+    * propagate.  However, I'm not 100% sure what we're supposed to do.
+    */
+   anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
+                  .TextureCacheInvalidationEnable = true);
+
+   return VK_SUCCESS;
+}
+
+void anv_CmdBindDescriptorSets(
     VkCmdBuffer                                 cmdBuffer,
     VkPipelineBindPoint                         pipelineBindPoint,
     uint32_t                                    firstSet,
@@ -2165,47 +2768,37 @@ void VKAPI vkCmdBindDescriptorSets(
     const uint32_t*                             pDynamicOffsets)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
-   struct anv_bindings *bindings = cmd_buffer->bindings;
+   struct anv_pipeline_layout *layout;
+   struct anv_descriptor_set *set;
+   struct anv_descriptor_set_layout *set_layout;
+
+   assert(firstSet + setCount < MAX_SETS);
+
+   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
+      layout = cmd_buffer->pipeline->layout;
+   else
+      layout = cmd_buffer->compute_pipeline->layout;
 
-   uint32_t offset = 0;
+   uint32_t dynamic_slot = 0;
    for (uint32_t i = 0; i < setCount; i++) {
-      struct anv_descriptor_set *set =
-         (struct anv_descriptor_set *) pDescriptorSets[i];
-      struct anv_descriptor_set_layout *set_layout = layout->set[firstSet + i].layout;
-
-      for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
-         uint32_t *surface_to_desc = set_layout->stage[s].surface_start;
-         uint32_t *sampler_to_desc = set_layout->stage[s].sampler_start;
-         uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0;
-         uint32_t start;
-
-         start = bias + layout->set[firstSet + i].surface_start[s];
-         for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) {
-            struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view;
-
-            bindings->descriptors[s].surfaces[start + b] =
-               view->surface_state.offset;
-            bindings->descriptors[s].relocs[start + b].bo = view->bo;
-            bindings->descriptors[s].relocs[start + b].offset = view->offset;
-         }
+      set = (struct anv_descriptor_set *) pDescriptorSets[i];
+      set_layout = layout->set[firstSet + i].layout;
 
-         start = layout->set[firstSet + i].sampler_start[s];
-         for (uint32_t b = 0; b < set_layout->stage[s].sampler_count; b++) {
-            struct anv_sampler *sampler = set->descriptors[sampler_to_desc[b]].sampler;
+      cmd_buffer->descriptors[firstSet + i].set = set;
 
-            memcpy(&bindings->descriptors[s].samplers[start + b],
-                   sampler->state, sizeof(sampler->state));
-         }
-      }
+      assert(set_layout->num_dynamic_buffers <
+             ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
+      memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
+             pDynamicOffsets + dynamic_slot,
+             set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
 
-      offset += layout->set[firstSet + i].layout->num_dynamic_buffers;
-   }
+      cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
 
-   cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY;
+      dynamic_slot += set_layout->num_dynamic_buffers;
+   }
 }
 
-void VKAPI vkCmdBindIndexBuffer(
+void anv_CmdBindIndexBuffer(
     VkCmdBuffer                                 cmdBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset,
@@ -2215,19 +2808,19 @@ void VKAPI vkCmdBindIndexBuffer(
    struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
 
    static const uint32_t vk_to_gen_index_type[] = {
-      [VK_INDEX_TYPE_UINT8] = INDEX_BYTE,
-      [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
-      [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
+      [VK_INDEX_TYPE_UINT8]                     = INDEX_BYTE,
+      [VK_INDEX_TYPE_UINT16]                    = INDEX_WORD,
+      [VK_INDEX_TYPE_UINT32]                    = INDEX_DWORD,
    };
 
    anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
                   .IndexFormat = vk_to_gen_index_type[indexType],
-                  .MemoryObjectControlState = 0,
+                  .MemoryObjectControlState = GEN8_MOCS,
                   .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
                   .BufferSize = buffer->size - offset);
 }
 
-void VKAPI vkCmdBindVertexBuffers(
+void anv_CmdBindVertexBuffers(
     VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    startBinding,
     uint32_t                                    bindingCount,
@@ -2235,121 +2828,387 @@ void VKAPI vkCmdBindVertexBuffers(
     const VkDeviceSize*                         pOffsets)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   struct anv_bindings *bindings = cmd_buffer->bindings;
+   struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
 
    /* We have to defer setting up vertex buffer since we need the buffer
     * stride from the pipeline. */
 
+   assert(startBinding + bindingCount < MAX_VBS);
    for (uint32_t i = 0; i < bindingCount; i++) {
-      bindings->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
-      bindings->vb[startBinding + i].offset = pOffsets[i];
+      vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
+      vb[startBinding + i].offset = pOffsets[i];
       cmd_buffer->vb_dirty |= 1 << (startBinding + i);
    }
 }
 
+static VkResult
+cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
+                              unsigned stage, struct anv_state *bt_state)
+{
+   struct anv_pipeline_layout *layout;
+   uint32_t color_attachments, bias, size;
+
+   if (stage == VK_SHADER_STAGE_COMPUTE)
+      layout = cmd_buffer->compute_pipeline->layout;
+   else
+      layout = cmd_buffer->pipeline->layout;
+
+   if (stage == VK_SHADER_STAGE_FRAGMENT) {
+      bias = MAX_RTS;
+      color_attachments = cmd_buffer->framebuffer->color_attachment_count;
+   } else {
+      bias = 0;
+      color_attachments = 0;
+   }
+
+   /* This is a little awkward: layout can be NULL but we still have to
+    * allocate and set a binding table for the PS stage for render
+    * targets. */
+   uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
+
+   if (color_attachments + surface_count == 0)
+      return VK_SUCCESS;
+
+   size = (bias + surface_count) * sizeof(uint32_t);
+   *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
+   uint32_t *bt_map = bt_state->map;
+
+   if (bt_state->map == NULL)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+   for (uint32_t ca = 0; ca < color_attachments; ca++) {
+      const struct anv_surface_view *view =
+         cmd_buffer->framebuffer->color_attachments[ca];
+
+      struct anv_state state =
+         anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
+
+      if (state.map == NULL)
+         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+      memcpy(state.map, view->surface_state.map, 64);
+
+      /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
+      *(uint64_t *)(state.map + 8 * 4) =
+         anv_reloc_list_add(&cmd_buffer->surface_relocs,
+                            cmd_buffer->device,
+                            state.offset + 8 * 4,
+                            view->bo, view->offset);
+
+      bt_map[ca] = state.offset;
+   }
+
+   if (layout == NULL)
+      return VK_SUCCESS;
+
+   for (uint32_t set = 0; set < layout->num_sets; set++) {
+      struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
+      struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+      struct anv_descriptor_slot *surface_slots =
+         set_layout->stage[stage].surface_start;
+
+      uint32_t start = bias + layout->set[set].surface_start[stage];
+
+      for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
+         struct anv_surface_view *view =
+            d->set->descriptors[surface_slots[b].index].view;
+
+         if (!view)
+            continue;
+
+         struct anv_state state =
+            anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
+
+         if (state.map == NULL)
+            return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+         uint32_t offset;
+         if (surface_slots[b].dynamic_slot >= 0) {
+            uint32_t dynamic_offset =
+               d->dynamic_offsets[surface_slots[b].dynamic_slot];
+
+            offset = view->offset + dynamic_offset;
+            fill_buffer_surface_state(state.map, view->format, offset,
+                                      view->range - dynamic_offset);
+         } else {
+            offset = view->offset;
+            memcpy(state.map, view->surface_state.map, 64);
+         }
+
+         /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
+         *(uint64_t *)(state.map + 8 * 4) =
+            anv_reloc_list_add(&cmd_buffer->surface_relocs,
+                               cmd_buffer->device,
+                               state.offset + 8 * 4,
+                               view->bo, offset);
+
+         bt_map[start + b] = state.offset;
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
+                         unsigned stage, struct anv_state *state)
+{
+   struct anv_pipeline_layout *layout;
+   uint32_t sampler_count;
+
+   if (stage == VK_SHADER_STAGE_COMPUTE)
+      layout = cmd_buffer->compute_pipeline->layout;
+   else
+      layout = cmd_buffer->pipeline->layout;
+
+   sampler_count = layout ? layout->stage[stage].sampler_count : 0;
+   if (sampler_count == 0)
+      return VK_SUCCESS;
+
+   uint32_t size = sampler_count * 16;
+   *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
+
+   if (state->map == NULL)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+   for (uint32_t set = 0; set < layout->num_sets; set++) {
+      struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
+      struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+      struct anv_descriptor_slot *sampler_slots =
+         set_layout->stage[stage].sampler_start;
+
+      uint32_t start = layout->set[set].sampler_start[stage];
+
+      for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
+         struct anv_sampler *sampler =
+            d->set->descriptors[sampler_slots[b].index].sampler;
+
+         if (!sampler)
+            continue;
+
+         memcpy(state->map + (start + b) * 16,
+                sampler->state, sizeof(sampler->state));
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
+{
+   struct anv_state surfaces = { 0, }, samplers = { 0, };
+   VkResult result;
+
+   result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
+   if (result != VK_SUCCESS)
+      return result;
+   result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
+   if (result != VK_SUCCESS)
+      return result;
+
+   static const uint32_t sampler_state_opcodes[] = {
+      [VK_SHADER_STAGE_VERTEX]                  = 43,
+      [VK_SHADER_STAGE_TESS_CONTROL]            = 44, /* HS */
+      [VK_SHADER_STAGE_TESS_EVALUATION]         = 45, /* DS */
+      [VK_SHADER_STAGE_GEOMETRY]                = 46,
+      [VK_SHADER_STAGE_FRAGMENT]                = 47,
+      [VK_SHADER_STAGE_COMPUTE]                 = 0,
+   };
+
+   static const uint32_t binding_table_opcodes[] = {
+      [VK_SHADER_STAGE_VERTEX]                  = 38,
+      [VK_SHADER_STAGE_TESS_CONTROL]            = 39,
+      [VK_SHADER_STAGE_TESS_EVALUATION]         = 40,
+      [VK_SHADER_STAGE_GEOMETRY]                = 41,
+      [VK_SHADER_STAGE_FRAGMENT]                = 42,
+      [VK_SHADER_STAGE_COMPUTE]                 = 0,
+   };
+
+   if (samplers.alloc_size > 0) {
+      anv_batch_emit(&cmd_buffer->batch,
+                     GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
+                     ._3DCommandSubOpcode  = sampler_state_opcodes[stage],
+                     .PointertoVSSamplerState = samplers.offset);
+   }
+
+   if (surfaces.alloc_size > 0) {
+      anv_batch_emit(&cmd_buffer->batch,
+                     GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
+                     ._3DCommandSubOpcode  = binding_table_opcodes[stage],
+                     .PointertoVSBindingTable = surfaces.offset);
+   }
+
+   return VK_SUCCESS;
+}
+
 static void
 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
 {
-   struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
-   struct anv_bindings *bindings = cmd_buffer->bindings;
-   uint32_t layers = cmd_buffer->framebuffer->layers;
+   uint32_t s, dirty = cmd_buffer->descriptors_dirty &
+                       cmd_buffer->pipeline->active_stages;
 
-   for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
-      uint32_t bias;
+   VkResult result;
+   for_each_bit(s, dirty) {
+      result = flush_descriptor_set(cmd_buffer, s);
+      if (result != VK_SUCCESS)
+         break;
+   }
 
-      if (s == VK_SHADER_STAGE_FRAGMENT) {
-         bias = MAX_RTS;
-         layers = cmd_buffer->framebuffer->layers;
-      } else {
-         bias = 0;
-         layers = 0;
-      }
+   if (result != VK_SUCCESS) {
+      assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
 
-      /* This is a little awkward: layout can be NULL but we still have to
-       * allocate and set a binding table for the PS stage for render
-       * targets. */
-      uint32_t surface_count = layout ? layout->stage[s].surface_count : 0;
-
-      if (layers + surface_count > 0) {
-         struct anv_state state;
-         uint32_t size;
-
-         size = (bias + surface_count) * sizeof(uint32_t);
-         state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 32);
-         memcpy(state.map, bindings->descriptors[s].surfaces, size);
-
-         for (uint32_t i = 0; i < layers; i++)
-            anv_reloc_list_add(&cmd_buffer->batch.surf_relocs,
-                               bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t),
-                               bindings->descriptors[s].relocs[i].bo,
-                               bindings->descriptors[s].relocs[i].offset);
-
-         for (uint32_t i = 0; i < surface_count; i++)
-            anv_reloc_list_add(&cmd_buffer->batch.surf_relocs,
-                               bindings->descriptors[s].surfaces[bias + i] + 8 * sizeof(int32_t),
-                               bindings->descriptors[s].relocs[bias + i].bo,
-                               bindings->descriptors[s].relocs[bias + i].offset);
-
-         static const uint32_t binding_table_opcodes[] = {
-            [VK_SHADER_STAGE_VERTEX] = 38,
-            [VK_SHADER_STAGE_TESS_CONTROL] = 39,
-            [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
-            [VK_SHADER_STAGE_GEOMETRY] = 41,
-            [VK_SHADER_STAGE_FRAGMENT] = 42,
-            [VK_SHADER_STAGE_COMPUTE] = 0,
-         };
+      result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
+      assert(result == VK_SUCCESS);
 
-         anv_batch_emit(&cmd_buffer->batch,
-                        GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
-                        ._3DCommandSubOpcode  = binding_table_opcodes[s],
-                        .PointertoVSBindingTable = state.offset);
+      /* Re-emit all active binding tables */
+      for_each_bit(s, cmd_buffer->pipeline->active_stages) {
+         result = flush_descriptor_set(cmd_buffer, s);
+
+         /* It had better succeed this time */
+         assert(result == VK_SUCCESS);
       }
+   }
 
-      if (layout && layout->stage[s].sampler_count > 0) {
-         struct anv_state state;
-         size_t size;
-
-         size = layout->stage[s].sampler_count * 16;
-         state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
-         memcpy(state.map, bindings->descriptors[s].samplers, size);
-
-         static const uint32_t sampler_state_opcodes[] = {
-            [VK_SHADER_STAGE_VERTEX] = 43,
-            [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
-            [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
-            [VK_SHADER_STAGE_GEOMETRY] = 46,
-            [VK_SHADER_STAGE_FRAGMENT] = 47,
-            [VK_SHADER_STAGE_COMPUTE] = 0,
-         };
+   cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
+}
+
+static struct anv_state
+anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
+                             uint32_t *a, uint32_t dwords, uint32_t alignment)
+{
+   struct anv_state state;
+
+   state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
+                                  dwords * 4, alignment);
+   memcpy(state.map, a, dwords * 4);
 
-         anv_batch_emit(&cmd_buffer->batch,
-                        GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
-                        ._3DCommandSubOpcode  = sampler_state_opcodes[s],
-                        .PointertoVSSamplerState = state.offset);
+   VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
+
+   return state;
+}
+
+static struct anv_state
+anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
+                             uint32_t *a, uint32_t *b,
+                             uint32_t dwords, uint32_t alignment)
+{
+   struct anv_state state;
+   uint32_t *p;
+
+   state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
+                                  dwords * 4, alignment);
+   p = state.map;
+   for (uint32_t i = 0; i < dwords; i++)
+      p[i] = a[i] | b[i];
+
+   VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
+
+   return state;
+}
+
+static VkResult
+flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_device *device = cmd_buffer->device;
+   struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+   struct anv_state surfaces = { 0, }, samplers = { 0, };
+   VkResult result;
+
+   result = cmd_buffer_emit_samplers(cmd_buffer,
+                                     VK_SHADER_STAGE_COMPUTE, &samplers);
+   if (result != VK_SUCCESS)
+      return result;
+   result = cmd_buffer_emit_binding_table(cmd_buffer,
+                                          VK_SHADER_STAGE_COMPUTE, &surfaces);
+   if (result != VK_SUCCESS)
+      return result;
+
+   struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
+      .KernelStartPointer = pipeline->cs_simd,
+      .KernelStartPointerHigh = 0,
+      .BindingTablePointer = surfaces.offset,
+      .BindingTableEntryCount = 0,
+      .SamplerStatePointer = samplers.offset,
+      .SamplerCount = 0,
+      .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
+   };
+
+   uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
+   struct anv_state state =
+      anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
+
+   GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
+
+   anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
+                  .InterfaceDescriptorTotalLength = size,
+                  .InterfaceDescriptorDataStartAddress = state.offset);
+
+   return VK_SUCCESS;
+}
+
+static void
+anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+   VkResult result;
+
+   assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
+
+   if (cmd_buffer->current_pipeline != GPGPU) {
+      anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
+                     .PipelineSelection = GPGPU);
+      cmd_buffer->current_pipeline = GPGPU;
+   }
+
+   if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
+      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+   if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
+       (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
+      result = flush_compute_descriptor_set(cmd_buffer);
+      if (result != VK_SUCCESS) {
+         result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
+         assert(result == VK_SUCCESS);
+         result = flush_compute_descriptor_set(cmd_buffer);
+         assert(result == VK_SUCCESS);
       }
+      cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
    }
+
+   cmd_buffer->compute_dirty = 0;
 }
 
 static void
 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_pipeline *pipeline = cmd_buffer->pipeline;
-   struct anv_bindings *bindings = cmd_buffer->bindings;
-   const uint32_t num_buffers = __builtin_popcount(cmd_buffer->vb_dirty);
-   const uint32_t num_dwords = 1 + num_buffers * 4;
    uint32_t *p;
 
-   if (cmd_buffer->vb_dirty) {
+   uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
+
+   assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
+
+   if (cmd_buffer->current_pipeline != _3D) {
+      anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
+                     .PipelineSelection = _3D);
+      cmd_buffer->current_pipeline = _3D;
+   }
+
+   if (vb_emit) {
+      const uint32_t num_buffers = __builtin_popcount(vb_emit);
+      const uint32_t num_dwords = 1 + num_buffers * 4;
+
       p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
                           GEN8_3DSTATE_VERTEX_BUFFERS);
       uint32_t vb, i = 0;
-      for_each_bit(vb, cmd_buffer->vb_dirty) {
-         struct anv_buffer *buffer = bindings->vb[vb].buffer;
-         uint32_t offset = bindings->vb[vb].offset;
-      
+      for_each_bit(vb, vb_emit) {
+         struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
+         uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
+
          struct GEN8_VERTEX_BUFFER_STATE state = {
             .VertexBufferIndex = vb,
-            .MemoryObjectControlState = 0,
+            .MemoryObjectControlState = GEN8_MOCS,
             .AddressModifyEnable = true,
             .BufferPitch = pipeline->binding_stride[vb],
             .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
@@ -2361,21 +3220,69 @@ anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
       }
    }
 
-   if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
+   if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
+      /* If somebody compiled a pipeline after starting a command buffer the
+       * scratch bo may have grown since we started this cmd buffer (and
+       * emitted STATE_BASE_ADDRESS).  If we're binding that pipeline now,
+       * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
+      if (cmd_buffer->scratch_size < pipeline->total_scratch)
+         anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+   }
 
-   if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY)
+   if (cmd_buffer->descriptors_dirty)
       flush_descriptor_sets(cmd_buffer);
 
-   if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY))
+   if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
+      anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
+                     .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
+      anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
+                     .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
+      anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
+                     .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
+   }
+
+   if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
       anv_batch_emit_merge(&cmd_buffer->batch,
                            cmd_buffer->rs_state->state_sf, pipeline->state_sf);
+      anv_batch_emit_merge(&cmd_buffer->batch,
+                           cmd_buffer->rs_state->state_raster, pipeline->state_raster);
+   }
 
-   cmd_buffer->vb_dirty = 0;
+   if (cmd_buffer->ds_state &&
+       (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)))
+      anv_batch_emit_merge(&cmd_buffer->batch,
+                           cmd_buffer->ds_state->state_wm_depth_stencil,
+                           pipeline->state_wm_depth_stencil);
+
+   if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
+      struct anv_state state;
+      if (cmd_buffer->ds_state == NULL)
+         state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
+                                             cmd_buffer->cb_state->state_color_calc,
+                                             GEN8_COLOR_CALC_STATE_length, 64);
+      else if (cmd_buffer->cb_state == NULL)
+         state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
+                                             cmd_buffer->ds_state->state_color_calc,
+                                             GEN8_COLOR_CALC_STATE_length, 64);
+      else
+         state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
+                                              cmd_buffer->ds_state->state_color_calc,
+                                              cmd_buffer->cb_state->state_color_calc,
+                                              GEN8_COLOR_CALC_STATE_length, 64);
+
+      anv_batch_emit(&cmd_buffer->batch,
+                     GEN8_3DSTATE_CC_STATE_POINTERS,
+                     .ColorCalcStatePointer = state.offset,
+                     .ColorCalcStatePointerValid = true);
+   }
+
+   cmd_buffer->vb_dirty &= ~vb_emit;
    cmd_buffer->dirty = 0;
 }
 
-void VKAPI vkCmdDraw(
+void anv_CmdDraw(
     VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    firstVertex,
     uint32_t                                    vertexCount,
@@ -2395,7 +3302,7 @@ void VKAPI vkCmdDraw(
                   .BaseVertexLocation = 0);
 }
 
-void VKAPI vkCmdDrawIndexed(
+void anv_CmdDrawIndexed(
     VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    firstIndex,
     uint32_t                                    indexCount,
@@ -2413,7 +3320,7 @@ void VKAPI vkCmdDrawIndexed(
                   .StartVertexLocation = firstIndex,
                   .InstanceCount = instanceCount,
                   .StartInstanceLocation = firstInstance,
-                  .BaseVertexLocation = 0);
+                  .BaseVertexLocation = vertexOffset);
 }
 
 static void
@@ -2441,7 +3348,7 @@ anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
 #define GEN7_3DPRIM_START_INSTANCE      0x243C
 #define GEN7_3DPRIM_BASE_VERTEX         0x2440
 
-void VKAPI vkCmdDrawIndirect(
+void anv_CmdDrawIndirect(
     VkCmdBuffer                                 cmdBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset,
@@ -2466,7 +3373,7 @@ void VKAPI vkCmdDrawIndirect(
                   .VertexAccessType = SEQUENTIAL);
 }
 
-void VKAPI vkCmdDrawIndexedIndirect(
+void anv_CmdDrawIndexedIndirect(
     VkCmdBuffer                                 cmdBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset,
@@ -2491,24 +3398,67 @@ void VKAPI vkCmdDrawIndexedIndirect(
                   .VertexAccessType = RANDOM);
 }
 
-void VKAPI vkCmdDispatch(
+void anv_CmdDispatch(
     VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    x,
     uint32_t                                    y,
     uint32_t                                    z)
 {
-   stub();
+   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+   struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+
+   anv_cmd_buffer_flush_compute_state(cmd_buffer);
+
+   anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
+                  .SIMDSize = prog_data->simd_size / 16,
+                  .ThreadDepthCounterMaximum = 0,
+                  .ThreadHeightCounterMaximum = 0,
+                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+                  .ThreadGroupIDXDimension = x,
+                  .ThreadGroupIDYDimension = y,
+                  .ThreadGroupIDZDimension = z,
+                  .RightExecutionMask = pipeline->cs_right_mask,
+                  .BottomExecutionMask = 0xffffffff);
+
+   anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
 }
 
-void VKAPI vkCmdDispatchIndirect(
+#define GPGPU_DISPATCHDIMX 0x2500
+#define GPGPU_DISPATCHDIMY 0x2504
+#define GPGPU_DISPATCHDIMZ 0x2508
+
+void anv_CmdDispatchIndirect(
     VkCmdBuffer                                 cmdBuffer,
-    VkBuffer                                    buffer,
+    VkBuffer                                    _buffer,
     VkDeviceSize                                offset)
 {
-   stub();
+   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+   struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+   struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
+   struct anv_bo *bo = buffer->bo;
+   uint32_t bo_offset = buffer->offset + offset;
+
+   anv_cmd_buffer_flush_compute_state(cmd_buffer);
+
+   anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
+   anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
+   anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
+
+   anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
+                  .IndirectParameterEnable = true,
+                  .SIMDSize = prog_data->simd_size / 16,
+                  .ThreadDepthCounterMaximum = 0,
+                  .ThreadHeightCounterMaximum = 0,
+                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+                  .RightExecutionMask = pipeline->cs_right_mask,
+                  .BottomExecutionMask = 0xffffffff);
+
+   anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
 }
 
-void VKAPI vkCmdSetEvent(
+void anv_CmdSetEvent(
     VkCmdBuffer                                 cmdBuffer,
     VkEvent                                     event,
     VkPipeEvent                                 pipeEvent)
@@ -2516,7 +3466,7 @@ void VKAPI vkCmdSetEvent(
    stub();
 }
 
-void VKAPI vkCmdResetEvent(
+void anv_CmdResetEvent(
     VkCmdBuffer                                 cmdBuffer,
     VkEvent                                     event,
     VkPipeEvent                                 pipeEvent)
@@ -2524,7 +3474,7 @@ void VKAPI vkCmdResetEvent(
    stub();
 }
 
-void VKAPI vkCmdWaitEvents(
+void anv_CmdWaitEvents(
     VkCmdBuffer                                 cmdBuffer,
     VkWaitEvent                                 waitEvent,
     uint32_t                                    eventCount,
@@ -2535,7 +3485,7 @@ void VKAPI vkCmdWaitEvents(
    stub();
 }
 
-void VKAPI vkCmdPipelineBarrier(
+void anv_CmdPipelineBarrier(
     VkCmdBuffer                                 cmdBuffer,
     VkWaitEvent                                 waitEvent,
     uint32_t                                    pipeEventCount,
@@ -2543,117 +3493,122 @@ void VKAPI vkCmdPipelineBarrier(
     uint32_t                                    memBarrierCount,
     const void**                                ppMemBarriers)
 {
-   stub();
-}
-
-static void
-anv_batch_emit_ps_depth_count(struct anv_batch *batch,
-                              struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(batch, GEN8_PIPE_CONTROL,
-                  .DestinationAddressType = DAT_PPGTT,
-                  .PostSyncOperation = WritePSDepthCount,
-                  .Address = { bo, offset });  /* FIXME: This is only lower 32 bits */
-}
-
-void VKAPI vkCmdBeginQuery(
-    VkCmdBuffer                                 cmdBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    slot,
-    VkQueryControlFlags                         flags)
-{
-   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
+   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
+   uint32_t b, *dw;
 
-   switch (pool->type) {
-   case VK_QUERY_TYPE_OCCLUSION:
-      anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16);
-      break;
+   struct GEN8_PIPE_CONTROL cmd = {
+      GEN8_PIPE_CONTROL_header,
+      .PostSyncOperation = NoWrite,
+   };
 
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-      break;
+   /* XXX: I think waitEvent is a no-op on our HW.  We should verify that. */
 
-   default:
-      break;
+   for (uint32_t i = 0; i < pipeEventCount; i++) {
+      switch (pPipeEvents[i]) {
+      case VK_PIPE_EVENT_TOP_OF_PIPE:
+         /* This is just what PIPE_CONTROL does */
+         break;
+      case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE:
+      case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE:
+      case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE:
+         cmd.StallAtPixelScoreboard = true;
+         break;
+      case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE:
+      case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE:
+      case VK_PIPE_EVENT_TRANSFER_COMPLETE:
+      case VK_PIPE_EVENT_COMMANDS_COMPLETE:
+         cmd.CommandStreamerStallEnable = true;
+         break;
+      default:
+         unreachable("Invalid VkPipeEvent");
+      }
    }
-}
 
-void VKAPI vkCmdEndQuery(
-    VkCmdBuffer                                 cmdBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    slot)
-{
-   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
-
-   switch (pool->type) {
-   case VK_QUERY_TYPE_OCCLUSION:
-      anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16 + 8);
-      break;
-
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-      break;
+   /* XXX: Right now, we're really dumb and just flush whatever categories
+    * the app asks for.  One of these days we may make this a bit better
+    * but right now that's all the hardware allows for in most areas.
+    */
+   VkMemoryOutputFlags out_flags = 0;
+   VkMemoryInputFlags in_flags = 0;
 
-   default:
-      break;
+   for (uint32_t i = 0; i < memBarrierCount; i++) {
+      const struct anv_common *common = ppMemBarriers[i];
+      switch (common->sType) {
+      case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
+         const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
+         out_flags |= barrier->outputMask;
+         in_flags |= barrier->inputMask;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
+         const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
+         out_flags |= barrier->outputMask;
+         in_flags |= barrier->inputMask;
+         break;
+      }
+      case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
+         const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
+         out_flags |= barrier->outputMask;
+         in_flags |= barrier->inputMask;
+         break;
+      }
+      default:
+         unreachable("Invalid memory barrier type");
+      }
    }
-}
 
-void VKAPI vkCmdResetQueryPool(
-    VkCmdBuffer                                 cmdBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    startQuery,
-    uint32_t                                    queryCount)
-{
-   stub();
-}
-
-#define TIMESTAMP 0x44070
-
-void VKAPI vkCmdWriteTimestamp(
-    VkCmdBuffer                                 cmdBuffer,
-    VkTimestampType                             timestampType,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset)
-{
-   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   struct anv_buffer *buffer = (struct anv_buffer *) destBuffer;
-   struct anv_bo *bo = buffer->bo;
-
-   switch (timestampType) {
-   case VK_TIMESTAMP_TYPE_TOP:
-      anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
-                     .RegisterAddress = TIMESTAMP,
-                     .MemoryAddress = { bo, buffer->offset + destOffset });
-      break;
-
-   case VK_TIMESTAMP_TYPE_BOTTOM:
-      anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
-                     .DestinationAddressType = DAT_PPGTT,
-                     .PostSyncOperation = WriteTimestamp,
-                     .Address = /* FIXME: This is only lower 32 bits */
-                        { bo, buffer->offset + destOffset });
-      break;
+   for_each_bit(b, out_flags) {
+      switch ((VkMemoryOutputFlags)(1 << b)) {
+      case VK_MEMORY_OUTPUT_CPU_WRITE_BIT:
+         break; /* FIXME: Little-core systems */
+      case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
+         cmd.DCFlushEnable = true;
+         break;
+      case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
+         cmd.RenderTargetCacheFlushEnable = true;
+         break;
+      case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
+         cmd.DepthCacheFlushEnable = true;
+         break;
+      case VK_MEMORY_OUTPUT_TRANSFER_BIT:
+         cmd.RenderTargetCacheFlushEnable = true;
+         cmd.DepthCacheFlushEnable = true;
+         break;
+      default:
+         unreachable("Invalid memory output flag");
+      }
+   }
 
-   default:
-      break;
+   for_each_bit(b, out_flags) {
+      switch ((VkMemoryInputFlags)(1 << b)) {
+      case VK_MEMORY_INPUT_CPU_READ_BIT:
+         break; /* FIXME: Little-core systems */
+      case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
+      case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
+      case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
+         cmd.VFCacheInvalidationEnable = true;
+         break;
+      case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
+         cmd.ConstantCacheInvalidationEnable = true;
+         /* fallthrough */
+      case VK_MEMORY_INPUT_SHADER_READ_BIT:
+         cmd.DCFlushEnable = true;
+         cmd.TextureCacheInvalidationEnable = true;
+         break;
+      case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
+      case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
+         break; /* XXX: Hunh? */
+      case VK_MEMORY_INPUT_TRANSFER_BIT:
+         cmd.TextureCacheInvalidationEnable = true;
+         break;
+      }
    }
-}
 
-void VKAPI vkCmdCopyQueryPoolResults(
-    VkCmdBuffer                                 cmdBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    startQuery,
-    uint32_t                                    queryCount,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset,
-    VkDeviceSize                                destStride,
-    VkQueryResultFlags                          flags)
-{
-   stub();
+   dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
+   GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
 }
 
-void VKAPI vkCmdInitAtomicCounters(
+void anv_CmdInitAtomicCounters(
     VkCmdBuffer                                 cmdBuffer,
     VkPipelineBindPoint                         pipelineBindPoint,
     uint32_t                                    startCounter,
@@ -2663,7 +3618,7 @@ void VKAPI vkCmdInitAtomicCounters(
    stub();
 }
 
-void VKAPI vkCmdLoadAtomicCounters(
+void anv_CmdLoadAtomicCounters(
     VkCmdBuffer                                 cmdBuffer,
     VkPipelineBindPoint                         pipelineBindPoint,
     uint32_t                                    startCounter,
@@ -2674,7 +3629,7 @@ void VKAPI vkCmdLoadAtomicCounters(
    stub();
 }
 
-void VKAPI vkCmdSaveAtomicCounters(
+void anv_CmdSaveAtomicCounters(
     VkCmdBuffer                                 cmdBuffer,
     VkPipelineBindPoint                         pipelineBindPoint,
     uint32_t                                    startCounter,
@@ -2685,7 +3640,23 @@ void VKAPI vkCmdSaveAtomicCounters(
    stub();
 }
 
-VkResult VKAPI vkCreateFramebuffer(
+static void
+anv_framebuffer_destroy(struct anv_device *device,
+                        struct anv_object *object,
+                        VkObjectType obj_type)
+{
+   struct anv_framebuffer *fb = (struct anv_framebuffer *)object;
+
+   assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER);
+
+   anv_DestroyObject((VkDevice) device,
+                     VK_OBJECT_TYPE_DYNAMIC_VP_STATE,
+                     fb->vp_state);
+
+   anv_device_free(device, fb);
+}
+
+VkResult anv_CreateFramebuffer(
     VkDevice                                    _device,
     const VkFramebufferCreateInfo*              pCreateInfo,
     VkFramebuffer*                              pFramebuffer)
@@ -2693,6 +3664,9 @@ VkResult VKAPI vkCreateFramebuffer(
    struct anv_device *device = (struct anv_device *) _device;
    struct anv_framebuffer *framebuffer;
 
+   static const struct anv_depth_stencil_view null_view =
+      { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
+
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
 
    framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8,
@@ -2700,6 +3674,8 @@ VkResult VKAPI vkCreateFramebuffer(
    if (framebuffer == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   framebuffer->base.destructor = anv_framebuffer_destroy;
+
    framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount;
    for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) {
       framebuffer->color_attachments[i] =
@@ -2709,6 +3685,8 @@ VkResult VKAPI vkCreateFramebuffer(
    if (pCreateInfo->pDepthStencilAttachment) {
       framebuffer->depth_stencil =
          (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view;
+   } else {
+      framebuffer->depth_stencil = &null_view;
    }
 
    framebuffer->sample_count = pCreateInfo->sampleCount;
@@ -2716,33 +3694,33 @@ VkResult VKAPI vkCreateFramebuffer(
    framebuffer->height = pCreateInfo->height;
    framebuffer->layers = pCreateInfo->layers;
 
-   vkCreateDynamicViewportState((VkDevice) device,
-                                &(VkDynamicVpStateCreateInfo) {
-                                   .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
-                                   .viewportAndScissorCount = 2,
-                                   .pViewports = (VkViewport[]) {
-                                      {
-                                         .originX = 0,
-                                         .originY = 0,
-                                         .width = pCreateInfo->width,
-                                         .height = pCreateInfo->height,
-                                         .minDepth = 0,
-                                         .maxDepth = 1
-                                      },
-                                   },
-                                   .pScissors = (VkRect[]) {
-                                      { {  0,  0 },
-                                        { pCreateInfo->width, pCreateInfo->height } },
-                                   }
-                                },
-                                &framebuffer->vp_state);
+   anv_CreateDynamicViewportState((VkDevice) device,
+      &(VkDynamicVpStateCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
+         .viewportAndScissorCount = 1,
+         .pViewports = (VkViewport[]) {
+            {
+               .originX = 0,
+               .originY = 0,
+               .width = pCreateInfo->width,
+               .height = pCreateInfo->height,
+               .minDepth = 0,
+               .maxDepth = 1
+            },
+         },
+         .pScissors = (VkRect[]) {
+            { {  0,  0 },
+              { pCreateInfo->width, pCreateInfo->height } },
+         }
+      },
+      &framebuffer->vp_state);
 
    *pFramebuffer = (VkFramebuffer) framebuffer;
 
    return VK_SUCCESS;
 }
 
-VkResult VKAPI vkCreateRenderPass(
+VkResult anv_CreateRenderPass(
     VkDevice                                    _device,
     const VkRenderPassCreateInfo*               pCreateInfo,
     VkRenderPass*                               pRenderPass)
@@ -2777,23 +3755,48 @@ VkResult VKAPI vkCreateRenderPass(
    return VK_SUCCESS;
 }
 
-void
-anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer)
+static void
+anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
+                                  struct anv_render_pass *pass)
 {
-   struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer;
-   struct anv_bindings *bindings = cmd_buffer->bindings;
-
-   for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) {
-      struct anv_surface_view *view = framebuffer->color_attachments[i];
+   const struct anv_depth_stencil_view *view =
+      cmd_buffer->framebuffer->depth_stencil;
 
-      bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = view->surface_state.offset;
-      bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo;
-      bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset;
-   }
-   cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY;
-}
+   /* FIXME: Implement the PMA stall W/A */
+   /* FIXME: Width and Height are wrong */
 
-void VKAPI vkCmdBeginRenderPass(
+   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
+                  .SurfaceType = SURFTYPE_2D,
+                  .DepthWriteEnable = view->depth_stride > 0,
+                  .StencilWriteEnable = view->stencil_stride > 0,
+                  .HierarchicalDepthBufferEnable = false,
+                  .SurfaceFormat = view->depth_format,
+                  .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
+                  .SurfaceBaseAddress = { view->bo,  view->depth_offset },
+                  .Height = pass->render_area.extent.height - 1,
+                  .Width = pass->render_area.extent.width - 1,
+                  .LOD = 0,
+                  .Depth = 1 - 1,
+                  .MinimumArrayElement = 0,
+                  .DepthBufferObjectControlState = GEN8_MOCS,
+                  .RenderTargetViewExtent = 1 - 1,
+                  .SurfaceQPitch = view->depth_qpitch >> 2);
+
+   /* Disable hierarchial depth buffers. */
+   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
+
+   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
+                  .StencilBufferEnable = view->stencil_stride > 0,
+                  .StencilBufferObjectControlState = GEN8_MOCS,
+                  .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
+                  .SurfaceBaseAddress = { view->bo, view->stencil_offset },
+                  .SurfaceQPitch = view->stencil_qpitch >> 2);
+
+   /* Clear the clear params. */
+   anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
+}
+
+void anv_CmdBeginRenderPass(
     VkCmdBuffer                                 cmdBuffer,
     const VkRenderPassBegin*                    pRenderPassBegin)
 {
@@ -2804,6 +3807,8 @@ void VKAPI vkCmdBeginRenderPass(
 
    cmd_buffer->framebuffer = framebuffer;
 
+   cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+
    anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
                   .ClippedDrawingRectangleYMin = pass->render_area.offset.y,
                   .ClippedDrawingRectangleXMin = pass->render_area.offset.x,
@@ -2814,12 +3819,12 @@ void VKAPI vkCmdBeginRenderPass(
                   .DrawingRectangleOriginY = 0,
                   .DrawingRectangleOriginX = 0);
 
-   anv_cmd_buffer_fill_render_targets(cmd_buffer);
+   anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
 
    anv_cmd_buffer_clear(cmd_buffer, pass);
 }
 
-void VKAPI vkCmdEndRenderPass(
+void anv_CmdEndRenderPass(
     VkCmdBuffer                                 cmdBuffer,
     VkRenderPass                                renderPass)
 {
@@ -2837,6 +3842,41 @@ void VKAPI vkCmdEndRenderPass(
                   .VFCacheInvalidationEnable = true,
                   .TextureCacheInvalidationEnable = true,
                   .CommandStreamerStallEnable = true);
+}
 
-   stub();
+void vkCmdDbgMarkerBegin(
+    VkCmdBuffer                              cmdBuffer,
+    const char*                                 pMarker)
+   __attribute__ ((visibility ("default")));
+
+void vkCmdDbgMarkerEnd(
+   VkCmdBuffer                              cmdBuffer)
+   __attribute__ ((visibility ("default")));
+
+VkResult vkDbgSetObjectTag(
+    VkDevice                                   device,
+    VkObject                                   object,
+    size_t                                     tagSize,
+    const void*                                pTag)
+   __attribute__ ((visibility ("default")));
+
+
+void vkCmdDbgMarkerBegin(
+    VkCmdBuffer                              cmdBuffer,
+    const char*                                 pMarker)
+{
+}
+
+void vkCmdDbgMarkerEnd(
+    VkCmdBuffer                              cmdBuffer)
+{
+}
+
+VkResult vkDbgSetObjectTag(
+    VkDevice                                   device,
+    VkObject                                   object,
+    size_t                                     tagSize,
+    const void*                                pTag)
+{
+    return VK_SUCCESS;
 }