winsys/drm: check for CAPS2/SM41 support if VGPU10 is enabled
[mesa.git] / src / gallium / winsys / svga / drm / vmw_screen_ioctl.c
index 5d81fa8c4a6de223e58a39df14474dec4711a7b6..739e4ea131fb0959d5345144cda4023f26293e10 100644 (file)
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "svgadump/svga_dump.h"
+#include "state_tracker/drm_driver.h"
 #include "vmw_screen.h"
 #include "vmw_context.h"
+#include "vmw_fence.h"
 #include "xf86drm.h"
 #include "vmwgfx_drm.h"
+#include "svga3d_caps.h"
+#include "svga3d_reg.h"
+
+#include "os/os_mman.h"
 
-#include <sys/mman.h>
 #include <errno.h>
 #include <unistd.h>
 
+#define VMW_MAX_DEFAULT_TEXTURE_SIZE   (128 * 1024 * 1024)
+#define VMW_FENCE_TIMEOUT_SECONDS 60
+
+#define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32)
+#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32)
+#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \
+   (svga3d_flags & ((uint64_t)UINT32_MAX))
+
 struct vmw_region
 {
-   SVGAGuestPtr ptr;
    uint32_t handle;
    uint64_t map_handle;
    void *data;
@@ -57,86 +69,54 @@ struct vmw_region
    uint32_t size;
 };
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. In newer versions of the kernel
- * interface the driver uses a special field.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-static void
-vmw_check_last_cmd(struct vmw_winsys_screen *vws)
+uint32_t
+vmw_region_size(struct vmw_region *region)
 {
-   static uint32_t buffer[16384];
-   struct drm_vmw_fifo_debug_arg arg;
-   int ret;
-
-   return;
-   memset(&arg, 0, sizeof(arg));
-   arg.debug_buffer = (unsigned long)buffer;
-   arg.debug_buffer_size = 65536;
-
-   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FIFO_DEBUG,
-                            &arg, sizeof(arg));
-
-   if (ret) {
-      debug_printf("%s Ioctl error: \"%s\".\n", __FUNCTION__, strerror(-ret));
-      return;
-   }
-
-   if (arg.did_not_fit) {
-      debug_printf("%s Command did not fit completely.\n", __FUNCTION__);
-   }
-
-   svga_dump_commands(buffer, arg.used_size);
-}
-
-static void
-vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping)
-{
-   VMW_FUNC;
-   (void)munmap(mapping, getpagesize());
+   return region->size;
 }
 
+#if defined(__DragonFly__) || defined(__FreeBSD__) || \
+    defined(__NetBSD__) || defined(__OpenBSD__)
+#define ERESTART EINTR
+#endif
 
-static void *
-vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws,
-                   uint32_t fifo_offset )
+uint32
+vmw_ioctl_context_create(struct vmw_winsys_screen *vws)
 {
-   void *map;
+   struct drm_vmw_context_arg c_arg;
+   int ret;
 
    VMW_FUNC;
 
-   map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
-             vws->ioctl.drm_fd, fifo_offset);
-
-   if (map == MAP_FAILED) {
-      debug_printf("Map failed %s\n", strerror(errno));
-      return NULL;
-   }
+   ret = drmCommandRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_CONTEXT,
+                       &c_arg, sizeof(c_arg));
 
-   vmw_printf("Fifo (min) is 0x%08x\n", ((uint32_t *) map)[SVGA_FIFO_MIN]);
+   if (ret)
+      return -1;
 
-   return map;
+   vmw_printf("Context id is %d\n", c_arg.cid);
+   return c_arg.cid;
 }
 
 uint32
-vmw_ioctl_context_create(struct vmw_winsys_screen *vws)
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+                                  boolean vgpu10)
 {
-   struct drm_vmw_context_arg c_arg;
+   union drm_vmw_extended_context_arg c_arg;
    int ret;
 
    VMW_FUNC;
-
-   ret = drmCommandRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_CONTEXT,
-                       &c_arg, sizeof(c_arg));
+   memset(&c_arg, 0, sizeof(c_arg));
+   c_arg.req = (vgpu10 ? drm_vmw_context_vgpu10 : drm_vmw_context_legacy);
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd,
+                             DRM_VMW_CREATE_EXTENDED_CONTEXT,
+                             &c_arg, sizeof(c_arg));
 
    if (ret)
       return -1;
 
-   vmw_check_last_cmd(vws);
    vmw_printf("Context id is %d\n", c_arg.cid);
-
-   return c_arg.cid;
+   return c_arg.rep.cid;
 }
 
 void
@@ -152,15 +132,16 @@ vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid)
    (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_CONTEXT,
                         &c_arg, sizeof(c_arg));
 
-   vmw_check_last_cmd(vws);
 }
 
 uint32
 vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
-                             SVGA3dSurfaceFlags flags,
-                             SVGA3dSurfaceFormat format,
-                             SVGA3dSize size,
-                             uint32_t numFaces, uint32_t numMipLevels)
+                         SVGA3dSurface1Flags flags,
+                         SVGA3dSurfaceFormat format,
+                         unsigned usage,
+                         SVGA3dSize size,
+                         uint32_t numFaces, uint32_t numMipLevels,
+                         unsigned sampleCount)
 {
    union drm_vmw_surface_create_arg s_arg;
    struct drm_vmw_surface_create_req *req = &s_arg.req;
@@ -175,19 +156,10 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
    vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
 
    memset(&s_arg, 0, sizeof(s_arg));
-   if (vws->use_old_scanout_flag &&
-       (flags & SVGA3D_SURFACE_HINT_SCANOUT)) {
-      req->flags = (uint32_t) flags;
-      req->scanout = false;
-   } else if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-      req->flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
-      req->scanout = true;
-   } else {
-      req->flags = (uint32_t) flags;
-      req->scanout = false;
-   }
+   req->flags = (uint32_t) flags;
+   req->scanout = !!(usage & SVGA_SURFACE_USAGE_SCANOUT);
    req->format = (uint32_t) format;
-   req->shareable = 1;
+   req->shareable = !!(usage & SVGA_SURFACE_USAGE_SHARED);
 
    assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
          DRM_VMW_MAX_MIP_LEVELS);
@@ -219,11 +191,302 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
       return -1;
 
    vmw_printf("Surface id is %d\n", rep->sid);
-   vmw_check_last_cmd(vws);
 
    return rep->sid;
 }
 
+
+uint32
+vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
+                            SVGA3dSurfaceAllFlags flags,
+                            SVGA3dSurfaceFormat format,
+                            unsigned usage,
+                            SVGA3dSize size,
+                            uint32_t numFaces,
+                            uint32_t numMipLevels,
+                            unsigned sampleCount,
+                            uint32_t buffer_handle,
+                            SVGA3dMSPattern multisamplePattern,
+                            SVGA3dMSQualityLevel qualityLevel,
+                            struct vmw_region **p_region)
+{
+   struct drm_vmw_gb_surface_create_rep *rep;
+   struct vmw_region *region = NULL;
+   int ret;
+
+   vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
+
+   if (p_region) {
+      region = CALLOC_STRUCT(vmw_region);
+      if (!region)
+         return SVGA3D_INVALID_ID;
+   }
+
+   if (vws->ioctl.have_drm_2_15) {
+      union drm_vmw_gb_surface_create_ext_arg s_arg;
+      struct drm_vmw_gb_surface_create_ext_req *req = &s_arg.req;
+      rep = &s_arg.rep;
+
+      memset(&s_arg, 0, sizeof(s_arg));
+
+      req->version = drm_vmw_gb_surface_v1;
+      req->multisample_pattern = multisamplePattern;
+      req->quality_level = qualityLevel;
+      req->must_be_zero = 0;
+      req->base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(flags);
+      req->svga3d_flags_upper_32_bits = SVGA3D_FLAGS_UPPER_32(flags);
+      req->base.format = (uint32_t) format;
+
+      if (usage & SVGA_SURFACE_USAGE_SCANOUT)
+         req->base.drm_surface_flags |= drm_vmw_surface_flag_scanout;
+
+      if (usage & SVGA_SURFACE_USAGE_SHARED)
+         req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable;
+
+      req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer;
+      req->base.base_size.width = size.width;
+      req->base.base_size.height = size.height;
+      req->base.base_size.depth = size.depth;
+      req->base.mip_levels = numMipLevels;
+      req->base.multisample_count = 0;
+      req->base.autogen_filter = SVGA3D_TEX_FILTER_NONE;
+
+      if (vws->base.have_vgpu10) {
+         req->base.array_size = numFaces;
+         req->base.multisample_count = sampleCount;
+      } else {
+         assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+               DRM_VMW_MAX_MIP_LEVELS);
+         req->base.array_size = 0;
+      }
+
+      req->base.buffer_handle = buffer_handle ?
+         buffer_handle : SVGA3D_INVALID_ID;
+
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd,
+                                DRM_VMW_GB_SURFACE_CREATE_EXT, &s_arg,
+                                sizeof(s_arg));
+
+      if (ret)
+         goto out_fail_create;
+   } else {
+      union drm_vmw_gb_surface_create_arg s_arg;
+      struct drm_vmw_gb_surface_create_req *req = &s_arg.req;
+      rep = &s_arg.rep;
+
+      memset(&s_arg, 0, sizeof(s_arg));
+      req->svga3d_flags = (uint32_t) flags;
+      req->format = (uint32_t) format;
+
+      if (usage & SVGA_SURFACE_USAGE_SCANOUT)
+         req->drm_surface_flags |= drm_vmw_surface_flag_scanout;
+
+      if (usage & SVGA_SURFACE_USAGE_SHARED)
+         req->drm_surface_flags |= drm_vmw_surface_flag_shareable;
+
+      req->drm_surface_flags |= drm_vmw_surface_flag_create_buffer;
+      req->base_size.width = size.width;
+      req->base_size.height = size.height;
+      req->base_size.depth = size.depth;
+      req->mip_levels = numMipLevels;
+      req->multisample_count = 0;
+      req->autogen_filter = SVGA3D_TEX_FILTER_NONE;
+
+      if (vws->base.have_vgpu10) {
+         req->array_size = numFaces;
+         req->multisample_count = sampleCount;
+      } else {
+         assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+               DRM_VMW_MAX_MIP_LEVELS);
+         req->array_size = 0;
+      }
+
+      req->buffer_handle = buffer_handle ?
+         buffer_handle : SVGA3D_INVALID_ID;
+
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_CREATE,
+                               &s_arg, sizeof(s_arg));
+
+      if (ret)
+         goto out_fail_create;
+   }
+
+   if (p_region) {
+      region->handle = rep->buffer_handle;
+      region->map_handle = rep->buffer_map_handle;
+      region->drm_fd = vws->ioctl.drm_fd;
+      region->size = rep->backup_size;
+      *p_region = region;
+   }
+
+   vmw_printf("Surface id is %d\n", rep->sid);
+   return rep->handle;
+
+out_fail_create:
+   FREE(region);
+   return SVGA3D_INVALID_ID;
+}
+
+/**
+ * vmw_ioctl_surface_req - Fill in a struct surface_req
+ *
+ * @vws: Winsys screen
+ * @whandle: Surface handle
+ * @req: The struct surface req to fill in
+ * @needs_unref: This call takes a kernel surface reference that needs to
+ * be unreferenced.
+ *
+ * Returns 0 on success, negative error type otherwise.
+ * Fills in the surface_req structure according to handle type and kernel
+ * capabilities.
+ */
+static int
+vmw_ioctl_surface_req(const struct vmw_winsys_screen *vws,
+                      const struct winsys_handle *whandle,
+                      struct drm_vmw_surface_arg *req,
+                      boolean *needs_unref)
+{
+   int ret;
+
+   switch(whandle->type) {
+   case WINSYS_HANDLE_TYPE_SHARED:
+   case WINSYS_HANDLE_TYPE_KMS:
+      *needs_unref = FALSE;
+      req->handle_type = DRM_VMW_HANDLE_LEGACY;
+      req->sid = whandle->handle;
+      break;
+   case WINSYS_HANDLE_TYPE_FD:
+      if (!vws->ioctl.have_drm_2_6) {
+         uint32_t handle;
+
+         ret = drmPrimeFDToHandle(vws->ioctl.drm_fd, whandle->handle, &handle);
+         if (ret) {
+            vmw_error("Failed to get handle from prime fd %d.\n",
+                      (int) whandle->handle);
+            return -EINVAL;
+         }
+
+         *needs_unref = TRUE;
+         req->handle_type = DRM_VMW_HANDLE_LEGACY;
+         req->sid = handle;
+      } else {
+         *needs_unref = FALSE;
+         req->handle_type = DRM_VMW_HANDLE_PRIME;
+         req->sid = whandle->handle;
+      }
+      break;
+   default:
+      vmw_error("Attempt to import unsupported handle type %d.\n",
+                whandle->type);
+      return -EINVAL;
+   }
+
+   return 0;
+}
+
+/**
+ * vmw_ioctl_gb_surface_ref - Put a reference on a guest-backed surface and
+ * get surface information
+ *
+ * @vws: Screen to register the reference on
+ * @handle: Kernel handle of the guest-backed surface
+ * @flags: flags used when the surface was created
+ * @format: Format used when the surface was created
+ * @numMipLevels: Number of mipmap levels of the surface
+ * @p_region: On successful return points to a newly allocated
+ * struct vmw_region holding a reference to the surface backup buffer.
+ *
+ * Returns 0 on success, a system error on failure.
+ */
+int
+vmw_ioctl_gb_surface_ref(struct vmw_winsys_screen *vws,
+                         const struct winsys_handle *whandle,
+                         SVGA3dSurfaceAllFlags *flags,
+                         SVGA3dSurfaceFormat *format,
+                         uint32_t *numMipLevels,
+                         uint32_t *handle,
+                         struct vmw_region **p_region)
+{
+   struct vmw_region *region = NULL;
+   boolean needs_unref = FALSE;
+   int ret;
+
+   assert(p_region != NULL);
+   region = CALLOC_STRUCT(vmw_region);
+   if (!region)
+      return -ENOMEM;
+
+   if (vws->ioctl.have_drm_2_15) {
+      union drm_vmw_gb_surface_reference_ext_arg s_arg;
+      struct drm_vmw_surface_arg *req = &s_arg.req;
+      struct drm_vmw_gb_surface_ref_ext_rep *rep = &s_arg.rep;
+
+      memset(&s_arg, 0, sizeof(s_arg));
+      ret = vmw_ioctl_surface_req(vws, whandle, req, &needs_unref);
+      if (ret)
+         goto out_fail_req;
+
+      *handle = req->sid;
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_REF_EXT,
+                               &s_arg, sizeof(s_arg));
+
+      if (ret)
+         goto out_fail_ref;
+
+      region->handle = rep->crep.buffer_handle;
+      region->map_handle = rep->crep.buffer_map_handle;
+      region->drm_fd = vws->ioctl.drm_fd;
+      region->size = rep->crep.backup_size;
+      *p_region = region;
+
+      *handle = rep->crep.handle;
+      *flags = SVGA3D_FLAGS_64(rep->creq.svga3d_flags_upper_32_bits,
+                               rep->creq.base.svga3d_flags);
+      *format = rep->creq.base.format;
+      *numMipLevels = rep->creq.base.mip_levels;
+   } else {
+      union drm_vmw_gb_surface_reference_arg s_arg;
+      struct drm_vmw_surface_arg *req = &s_arg.req;
+      struct drm_vmw_gb_surface_ref_rep *rep = &s_arg.rep;
+
+      memset(&s_arg, 0, sizeof(s_arg));
+      ret = vmw_ioctl_surface_req(vws, whandle, req, &needs_unref);
+      if (ret)
+         goto out_fail_req;
+
+      *handle = req->sid;
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_REF,
+                               &s_arg, sizeof(s_arg));
+
+      if (ret)
+         goto out_fail_ref;
+
+      region->handle = rep->crep.buffer_handle;
+      region->map_handle = rep->crep.buffer_map_handle;
+      region->drm_fd = vws->ioctl.drm_fd;
+      region->size = rep->crep.backup_size;
+      *p_region = region;
+
+      *handle = rep->crep.handle;
+      *flags = rep->creq.svga3d_flags;
+      *format = rep->creq.format;
+      *numMipLevels = rep->creq.mip_levels;
+   }
+
+   vmw_printf("%s flags %d format %d\n", __FUNCTION__, *flags, *format);
+
+   if (needs_unref)
+      vmw_ioctl_surface_destroy(vws, *handle);
+
+   return 0;
+out_fail_ref:
+   if (needs_unref)
+      vmw_ioctl_surface_destroy(vws, *handle);
+out_fail_req:
+   FREE(region);
+   return ret;
+}
+
 void
 vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, uint32 sid)
 {
@@ -236,17 +499,18 @@ vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, uint32 sid)
 
    (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_SURFACE,
                         &s_arg, sizeof(s_arg));
-   vmw_check_last_cmd(vws);
-
 }
 
 void
-vmw_ioctl_command(struct vmw_winsys_screen *vws, void *commands, uint32_t size,
-                      uint32_t * pfence)
+vmw_ioctl_command(struct vmw_winsys_screen *vws, int32_t cid,
+                  uint32_t throttle_us, void *commands, uint32_t size,
+                  struct pipe_fence_handle **pfence, int32_t imported_fence_fd,
+                  uint32_t flags)
 {
    struct drm_vmw_execbuf_arg arg;
    struct drm_vmw_fence_rep rep;
    int ret;
+   int argsize;
 
 #ifdef DEBUG
    {
@@ -271,37 +535,69 @@ vmw_ioctl_command(struct vmw_winsys_screen *vws, void *commands, uint32_t size,
    memset(&arg, 0, sizeof(arg));
    memset(&rep, 0, sizeof(rep));
 
+   if (flags & SVGA_HINT_FLAG_EXPORT_FENCE_FD) {
+      arg.flags |= DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD;
+   }
+
+   if (imported_fence_fd != -1) {
+      arg.flags |= DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD;
+   }
+
    rep.error = -EFAULT;
-   arg.fence_rep = (unsigned long)&rep;
+   if (pfence)
+      arg.fence_rep = (unsigned long)&rep;
    arg.commands = (unsigned long)commands;
    arg.command_size = size;
-
+   arg.throttle_us = throttle_us;
+   arg.version = vws->ioctl.drm_execbuf_version;
+   arg.context_handle = (vws->base.have_vgpu10 ? cid : SVGA3D_INVALID_ID);
+
+   /* Older DRM module requires this to be zero */
+   if (vws->base.have_fence_fd)
+      arg.imported_fence_fd = imported_fence_fd;
+
+   /* In DRM_VMW_EXECBUF_VERSION 1, the drm_vmw_execbuf_arg structure ends with
+    * the flags field. The structure size sent to drmCommandWrite must match
+    * the drm_execbuf_version. Otherwise, an invalid value will be returned.
+    */
+   argsize = vws->ioctl.drm_execbuf_version > 1 ? sizeof(arg) :
+                offsetof(struct drm_vmw_execbuf_arg, context_handle);
    do {
-       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg));
+       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, argsize);
    } while(ret == -ERESTART);
    if (ret) {
-      debug_printf("%s error %s.\n", __FUNCTION__, strerror(-ret));
+      vmw_error("%s error %s.\n", __FUNCTION__, strerror(-ret));
+      abort();
    }
+
    if (rep.error) {
 
       /*
-       * Kernel has synced and put the last fence sequence in the FIFO
-       * register.
+       * Kernel has already synced, or caller requested no fence.
        */
-
-      if (rep.error == -EFAULT)
-        rep.fence_seq = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
-
-      debug_printf("%s Fence error %s.\n", __FUNCTION__,
-                  strerror(-rep.error));
+      if (pfence)
+        *pfence = NULL;
+   } else {
+      if (pfence) {
+         vmw_fences_signal(vws->fence_ops, rep.passed_seqno, rep.seqno,
+                           TRUE);
+
+         /* Older DRM module will set this to zero, but -1 is the proper FD
+          * to use for no Fence FD support */
+         if (!vws->base.have_fence_fd)
+            rep.fd = -1;
+
+         *pfence = vmw_fence_create(vws->fence_ops, rep.handle,
+                                    rep.seqno, rep.mask, rep.fd);
+         if (*pfence == NULL) {
+            /*
+             * Fence creation failed. Need to sync.
+             */
+            (void) vmw_ioctl_fence_finish(vws, rep.handle, rep.mask);
+            vmw_ioctl_fence_unref(vws, rep.handle);
+         }
+      }
    }
-
-   vws->ioctl.last_fence = rep.fence_seq;
-
-   if (pfence)
-      *pfence = rep.fence_seq;
-   vmw_check_last_cmd(vws);
-
 }
 
 
@@ -328,12 +624,10 @@ vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size)
    } while (ret == -ERESTART);
 
    if (ret) {
-      debug_printf("IOCTL failed %d: %s\n", ret, strerror(-ret));
+      vmw_error("IOCTL failed %d: %s\n", ret, strerror(-ret));
       goto out_err1;
    }
 
-   region->ptr.gmrId = rep->cur_gmr_id;
-   region->ptr.offset = rep->cur_gmr_offset;
    region->data = NULL;
    region->handle = rep->handle;
    region->map_handle = rep->map_handle;
@@ -360,7 +654,7 @@ vmw_ioctl_region_destroy(struct vmw_region *region)
               region->ptr.gmrId, region->ptr.offset);
 
    if (region->data) {
-      munmap(region->data, region->size);
+      os_munmap(region->data, region->size);
       region->data = NULL;
    }
 
@@ -374,7 +668,8 @@ vmw_ioctl_region_destroy(struct vmw_region *region)
 SVGAGuestPtr
 vmw_ioctl_region_ptr(struct vmw_region *region)
 {
-   return region->ptr;
+   SVGAGuestPtr ptr = {region->handle, 0};
+   return ptr;
 }
 
 void *
@@ -386,10 +681,10 @@ vmw_ioctl_region_map(struct vmw_region *region)
               region->ptr.gmrId, region->ptr.offset);
 
    if (region->data == NULL) {
-      map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+      map = os_mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
                 region->drm_fd, region->map_handle);
       if (map == MAP_FAILED) {
-        debug_printf("%s: Map failed.\n", __FUNCTION__);
+        vmw_error("%s: Map failed.\n", __FUNCTION__);
         return NULL;
       }
 
@@ -409,111 +704,490 @@ vmw_ioctl_region_unmap(struct vmw_region *region)
    --region->map_count;
 }
 
-
+/**
+ * vmw_ioctl_syncforcpu - Synchronize a buffer object for CPU usage
+ *
+ * @region: Pointer to a struct vmw_region representing the buffer object.
+ * @dont_block: Dont wait for GPU idle, but rather return -EBUSY if the
+ * GPU is busy with the buffer object.
+ * @readonly: Hint that the CPU access is read-only.
+ * @allow_cs: Allow concurrent command submission while the buffer is
+ * synchronized for CPU. If FALSE command submissions referencing the
+ * buffer will block until a corresponding call to vmw_ioctl_releasefromcpu.
+ *
+ * This function idles any GPU activities touching the buffer and blocks
+ * command submission of commands referencing the buffer, even from
+ * other processes.
+ */
 int
-vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws,
-                          uint32_t fence)
+vmw_ioctl_syncforcpu(struct vmw_region *region,
+                     boolean dont_block,
+                     boolean readonly,
+                     boolean allow_cs)
 {
-   uint32_t expected;
-   uint32_t current;
-   
-   assert(fence);
-   if(!fence)
-      return 0;
-   
-   expected = fence;
-   current = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
-   
-   if ((int32)(current - expected) >= 0)
-      return 0; /* fence passed */
-   else
-      return -1;
+   struct drm_vmw_synccpu_arg arg;
+
+   memset(&arg, 0, sizeof(arg));
+   arg.op = drm_vmw_synccpu_grab;
+   arg.handle = region->handle;
+   arg.flags = drm_vmw_synccpu_read;
+   if (!readonly)
+      arg.flags |= drm_vmw_synccpu_write;
+   if (dont_block)
+      arg.flags |= drm_vmw_synccpu_dontblock;
+   if (allow_cs)
+      arg.flags |= drm_vmw_synccpu_allow_cs;
+
+   return drmCommandWrite(region->drm_fd, DRM_VMW_SYNCCPU, &arg, sizeof(arg));
 }
 
+/**
+ * vmw_ioctl_releasefromcpu - Undo a previous syncforcpu.
+ *
+ * @region: Pointer to a struct vmw_region representing the buffer object.
+ * @readonly: Should hold the same value as the matching syncforcpu call.
+ * @allow_cs: Should hold the same value as the matching syncforcpu call.
+ */
+void
+vmw_ioctl_releasefromcpu(struct vmw_region *region,
+                         boolean readonly,
+                         boolean allow_cs)
+{
+   struct drm_vmw_synccpu_arg arg;
+
+   memset(&arg, 0, sizeof(arg));
+   arg.op = drm_vmw_synccpu_release;
+   arg.handle = region->handle;
+   arg.flags = drm_vmw_synccpu_read;
+   if (!readonly)
+      arg.flags |= drm_vmw_synccpu_write;
+   if (allow_cs)
+      arg.flags |= drm_vmw_synccpu_allow_cs;
 
-static void
-vmw_ioctl_sync(struct vmw_winsys_screen *vws, 
-                   uint32_t fence)
+   (void) drmCommandWrite(region->drm_fd, DRM_VMW_SYNCCPU, &arg, sizeof(arg));
+}
+
+void
+vmw_ioctl_fence_unref(struct vmw_winsys_screen *vws,
+                     uint32_t handle)
 {
-   uint32_t cur_fence;
-   struct drm_vmw_fence_wait_arg arg;
+   struct drm_vmw_fence_arg arg;
    int ret;
+   
+   memset(&arg, 0, sizeof(arg));
+   arg.handle = handle;
+
+   ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_FENCE_UNREF,
+                        &arg, sizeof(arg));
+   if (ret != 0)
+      vmw_error("%s Failed\n", __FUNCTION__);
+}
 
-   vmw_printf("%s: fence = %lu\n", __FUNCTION__,
-              (unsigned long)fence);
+static inline uint32_t
+vmw_drm_fence_flags(uint32_t flags)
+{
+    uint32_t dflags = 0;
+
+    if (flags & SVGA_FENCE_FLAG_EXEC)
+       dflags |= DRM_VMW_FENCE_FLAG_EXEC;
+    if (flags & SVGA_FENCE_FLAG_QUERY)
+       dflags |= DRM_VMW_FENCE_FLAG_QUERY;
+
+    return dflags;
+}
 
-   cur_fence = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
-   vmw_printf("%s: Fence id read is 0x%08x\n", __FUNCTION__,
-              (unsigned int)cur_fence);
 
-   if ((cur_fence - fence) < (1 << 24))
-      return;
+int
+vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws,
+                         uint32_t handle,
+                         uint32_t flags)
+{
+   struct drm_vmw_fence_signaled_arg arg;
+   uint32_t vflags = vmw_drm_fence_flags(flags);
+   int ret;
 
    memset(&arg, 0, sizeof(arg));
-   arg.sequence = fence;
+   arg.handle = handle;
+   arg.flags = vflags;
 
-   do {
-       ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FENCE_WAIT, &arg,
-                                sizeof(arg));
-   } while (ret == -ERESTART);
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FENCE_SIGNALED,
+                            &arg, sizeof(arg));
+
+   if (ret != 0)
+      return ret;
+
+   vmw_fences_signal(vws->fence_ops, arg.passed_seqno, 0, FALSE);
+
+   return (arg.signaled) ? 0 : -1;
 }
 
 
+
 int
 vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws,
-                       uint32_t fence)
+                       uint32_t handle,
+                      uint32_t flags)
 {
-   assert(fence);
+   struct drm_vmw_fence_wait_arg arg;
+   uint32_t vflags = vmw_drm_fence_flags(flags);
+   int ret;
+
+   memset(&arg, 0, sizeof(arg));
+
+   arg.handle = handle;
+   arg.timeout_us = VMW_FENCE_TIMEOUT_SECONDS*1000000;
+   arg.lazy = 0;
+   arg.flags = vflags;
+
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FENCE_WAIT,
+                            &arg, sizeof(arg));
+
+   if (ret != 0)
+      vmw_error("%s Failed\n", __FUNCTION__);
    
-   if(fence) {
-      if(vmw_ioctl_fence_signalled(vws, fence) != 0) {
-         vmw_ioctl_sync(vws, fence);
+   return 0;
+}
+
+uint32
+vmw_ioctl_shader_create(struct vmw_winsys_screen *vws,
+                       SVGA3dShaderType type,
+                       uint32 code_len)
+{
+   struct drm_vmw_shader_create_arg sh_arg;
+   int ret;
+
+   VMW_FUNC;
+
+   memset(&sh_arg, 0, sizeof(sh_arg));
+
+   sh_arg.size = code_len;
+   sh_arg.buffer_handle = SVGA3D_INVALID_ID;
+   sh_arg.shader_handle = SVGA3D_INVALID_ID;
+   switch (type) {
+   case SVGA3D_SHADERTYPE_VS:
+      sh_arg.shader_type = drm_vmw_shader_type_vs;
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      sh_arg.shader_type = drm_vmw_shader_type_ps;
+      break;
+   default:
+      assert(!"Invalid shader type.");
+      break;
+   }
+
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_SHADER,
+                            &sh_arg, sizeof(sh_arg));
+
+   if (ret)
+      return SVGA3D_INVALID_ID;
+
+   return sh_arg.shader_handle;
+}
+
+void
+vmw_ioctl_shader_destroy(struct vmw_winsys_screen *vws, uint32 shid)
+{
+   struct drm_vmw_shader_arg sh_arg;
+
+   VMW_FUNC;
+
+   memset(&sh_arg, 0, sizeof(sh_arg));
+   sh_arg.handle = shid;
+
+   (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_SHADER,
+                        &sh_arg, sizeof(sh_arg));
+
+}
+
+static int
+vmw_ioctl_parse_caps(struct vmw_winsys_screen *vws,
+                    const uint32_t *cap_buffer)
+{
+   int i;
+
+   if (vws->base.have_gb_objects) {
+      for (i = 0; i < vws->ioctl.num_cap_3d; ++i) {
+        vws->ioctl.cap_3d[i].has_cap = TRUE;
+        vws->ioctl.cap_3d[i].result.u = cap_buffer[i];
+      }
+      return 0;
+   } else {
+      const uint32 *capsBlock;
+      const SVGA3dCapsRecord *capsRecord = NULL;
+      uint32 offset;
+      const SVGA3dCapPair *capArray;
+      int numCaps, index;
+
+      /*
+       * Search linearly through the caps block records for the specified type.
+       */
+      capsBlock = cap_buffer;
+      for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
+        const SVGA3dCapsRecord *record;
+        assert(offset < SVGA_FIFO_3D_CAPS_SIZE);
+        record = (const SVGA3dCapsRecord *) (capsBlock + offset);
+        if ((record->header.type >= SVGA3DCAPS_RECORD_DEVCAPS_MIN) &&
+            (record->header.type <= SVGA3DCAPS_RECORD_DEVCAPS_MAX) &&
+            (!capsRecord || (record->header.type > capsRecord->header.type))) {
+           capsRecord = record;
+        }
+      }
+
+      if(!capsRecord)
+        return -1;
+
+      /*
+       * Calculate the number of caps from the size of the record.
+       */
+      capArray = (const SVGA3dCapPair *) capsRecord->data;
+      numCaps = (int) ((capsRecord->header.length * sizeof(uint32) -
+                       sizeof capsRecord->header) / (2 * sizeof(uint32)));
+
+      for (i = 0; i < numCaps; i++) {
+        index = capArray[i][0];
+        if (index < vws->ioctl.num_cap_3d) {
+           vws->ioctl.cap_3d[index].has_cap = TRUE;
+           vws->ioctl.cap_3d[index].result.u = capArray[i][1];
+        } else {
+           debug_printf("Unknown devcaps seen: %d\n", index);
+        }
       }
    }
-   
    return 0;
 }
 
-
 boolean
 vmw_ioctl_init(struct vmw_winsys_screen *vws)
 {
    struct drm_vmw_getparam_arg gp_arg;
+   struct drm_vmw_get_3d_cap_arg cap_arg;
+   unsigned int size;
    int ret;
+   uint32_t *cap_buffer;
+   drmVersionPtr version;
+   boolean drm_gb_capable;
+   boolean have_drm_2_5;
 
    VMW_FUNC;
 
+   version = drmGetVersion(vws->ioctl.drm_fd);
+   if (!version)
+      goto out_no_version;
+
+   have_drm_2_5 = version->version_major > 2 ||
+      (version->version_major == 2 && version->version_minor > 4);
+   vws->ioctl.have_drm_2_6 = version->version_major > 2 ||
+      (version->version_major == 2 && version->version_minor > 5);
+   vws->ioctl.have_drm_2_9 = version->version_major > 2 ||
+      (version->version_major == 2 && version->version_minor > 8);
+   vws->ioctl.have_drm_2_15 = version->version_major > 2 ||
+      (version->version_major == 2 && version->version_minor > 14);
+
+   vws->ioctl.drm_execbuf_version = vws->ioctl.have_drm_2_9 ? 2 : 1;
+
+   drm_gb_capable = have_drm_2_5;
+
    memset(&gp_arg, 0, sizeof(gp_arg));
    gp_arg.param = DRM_VMW_PARAM_3D;
    ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
                             &gp_arg, sizeof(gp_arg));
    if (ret || gp_arg.value == 0) {
-      debug_printf("No 3D enabled (%i, %s)\n", ret, strerror(-ret));
-      goto out_err1;
+      vmw_error("No 3D enabled (%i, %s).\n", ret, strerror(-ret));
+      goto out_no_3d;
    }
 
    memset(&gp_arg, 0, sizeof(gp_arg));
-   gp_arg.param = DRM_VMW_PARAM_FIFO_OFFSET;
+   gp_arg.param = DRM_VMW_PARAM_FIFO_HW_VERSION;
    ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
                             &gp_arg, sizeof(gp_arg));
+   if (ret) {
+      vmw_error("Failed to get fifo hw version (%i, %s).\n",
+                ret, strerror(-ret));
+      goto out_no_3d;
+   }
+   vws->ioctl.hwversion = gp_arg.value;
+
+   memset(&gp_arg, 0, sizeof(gp_arg));
+   gp_arg.param = DRM_VMW_PARAM_HW_CAPS;
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                             &gp_arg, sizeof(gp_arg));
+   if (ret)
+      vws->base.have_gb_objects = FALSE;
+   else
+      vws->base.have_gb_objects =
+         !!(gp_arg.value & (uint64_t) SVGA_CAP_GBOBJECTS);
+   
+   if (vws->base.have_gb_objects && !drm_gb_capable)
+      goto out_no_3d;
+
+   vws->base.have_vgpu10 = FALSE;
+   vws->base.have_sm4_1 = FALSE;
+   vws->base.have_intra_surface_copy = FALSE;
+
+   if (vws->base.have_gb_objects) {
+      memset(&gp_arg, 0, sizeof(gp_arg));
+      gp_arg.param = DRM_VMW_PARAM_MAX_MOB_MEMORY;
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                &gp_arg, sizeof(gp_arg));
+      if (ret) {
+         /* Just guess a large enough value. */
+         vws->ioctl.max_mob_memory = 256*1024*1024;
+      } else {
+         vws->ioctl.max_mob_memory = gp_arg.value;
+      }
+
+      memset(&gp_arg, 0, sizeof(gp_arg));
+      gp_arg.param = DRM_VMW_PARAM_MAX_MOB_SIZE;
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                &gp_arg, sizeof(gp_arg));
+
+      if (ret || gp_arg.value == 0) {
+           vws->ioctl.max_texture_size = VMW_MAX_DEFAULT_TEXTURE_SIZE;
+      } else {
+           vws->ioctl.max_texture_size = gp_arg.value;
+      }
+
+      /* Never early flush surfaces, mobs do accounting. */
+      vws->ioctl.max_surface_memory = -1;
+
+      if (vws->ioctl.have_drm_2_9) {
+         memset(&gp_arg, 0, sizeof(gp_arg));
+         gp_arg.param = DRM_VMW_PARAM_VGPU10;
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+         if (ret == 0 && gp_arg.value != 0) {
+            const char *vgpu10_val;
+
+            debug_printf("Have VGPU10 interface and hardware.\n");
+            vws->base.have_vgpu10 = TRUE;
+            vgpu10_val = getenv("SVGA_VGPU10");
+            if (vgpu10_val && strcmp(vgpu10_val, "0") == 0) {
+               debug_printf("Disabling VGPU10 interface.\n");
+               vws->base.have_vgpu10 = FALSE;
+            } else {
+               debug_printf("Enabling VGPU10 interface.\n");
+            }
+         }
+      }
+
+      if (vws->ioctl.have_drm_2_15 && vws->base.have_vgpu10) {
+         memset(&gp_arg, 0, sizeof(gp_arg));
+         gp_arg.param = DRM_VMW_PARAM_HW_CAPS2;
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+         if (ret == 0 && gp_arg.value != 0) {
+            vws->base.have_intra_surface_copy = TRUE;
+         }
+
+         memset(&gp_arg, 0, sizeof(gp_arg));
+         gp_arg.param = DRM_VMW_PARAM_SM4_1;
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+         if (ret == 0 && gp_arg.value != 0) {
+            vws->base.have_sm4_1 = TRUE;
+         }
+      }
+
+      memset(&gp_arg, 0, sizeof(gp_arg));
+      gp_arg.param = DRM_VMW_PARAM_3D_CAPS_SIZE;
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                &gp_arg, sizeof(gp_arg));
+      if (ret)
+         size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t);
+      else
+         size = gp_arg.value;
+
+      if (vws->base.have_gb_objects)
+         vws->ioctl.num_cap_3d = size / sizeof(uint32_t);
+      else
+         vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX;
+   } else {
+      vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX;
+
+      memset(&gp_arg, 0, sizeof(gp_arg));
+      gp_arg.param = DRM_VMW_PARAM_MAX_SURF_MEMORY;
+      if (have_drm_2_5)
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+      if (!have_drm_2_5 || ret) {
+         /* Just guess a large enough value, around 800mb. */
+         vws->ioctl.max_surface_memory = 0x30000000;
+      } else {
+         vws->ioctl.max_surface_memory = gp_arg.value;
+      }
+
+      vws->ioctl.max_texture_size = VMW_MAX_DEFAULT_TEXTURE_SIZE;
+
+      size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t);
+   }
+
+   debug_printf("VGPU10 interface is %s.\n",
+                vws->base.have_vgpu10 ? "on" : "off");
+
+   cap_buffer = calloc(1, size);
+   if (!cap_buffer) {
+      debug_printf("Failed alloc fifo 3D caps buffer.\n");
+      goto out_no_3d;
+   }
+
+   vws->ioctl.cap_3d = calloc(vws->ioctl.num_cap_3d, 
+                             sizeof(*vws->ioctl.cap_3d));
+   if (!vws->ioctl.cap_3d) {
+      debug_printf("Failed alloc fifo 3D caps buffer.\n");
+      goto out_no_caparray;
+   }
+
+   memset(&cap_arg, 0, sizeof(cap_arg));
+   cap_arg.buffer = (uint64_t) (unsigned long) (cap_buffer);
+   cap_arg.max_size = size;
+
+   /*
+    * This call must always be after DRM_VMW_PARAM_MAX_MOB_MEMORY and
+    * DRM_VMW_PARAM_SM4_1. This is because, based on these calls, kernel
+    * driver sends the supported cap.
+    */
+   ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_GET_3D_CAP,
+                        &cap_arg, sizeof(cap_arg));
 
    if (ret) {
-      debug_printf("GET_PARAM on %d returned %d: %s\n",
-                  vws->ioctl.drm_fd, ret, strerror(-ret));
-      goto out_err1;
+      debug_printf("Failed to get 3D capabilities"
+                  " (%i, %s).\n", ret, strerror(-ret));
+      goto out_no_caps;
+   }
+
+   ret = vmw_ioctl_parse_caps(vws, cap_buffer);
+   if (ret) {
+      debug_printf("Failed to parse 3D capabilities"
+                  " (%i, %s).\n", ret, strerror(-ret));
+      goto out_no_caps;
    }
 
-   vmw_printf("Offset to map is 0x%08llx\n",
-              (unsigned long long)gp_arg.value);
+   if (((version->version_major == 2 && version->version_minor >= 10)
+       || version->version_major > 2) && vws->base.have_vgpu10) {
 
-   vws->ioctl.fifo_map = vmw_ioctl_fifo_map(vws, gp_arg.value);
-   if (vws->ioctl.fifo_map == NULL)
-      goto out_err1;
+     /* support for these commands didn't make it into vmwgfx kernel
+      * modules before 2.10.
+      */
+      vws->base.have_generate_mipmap_cmd = TRUE;
+      vws->base.have_set_predication_cmd = TRUE;
+   }
+
+   if (version->version_major == 2 && version->version_minor >= 14) {
+      vws->base.have_fence_fd = TRUE;
+   }
 
+   free(cap_buffer);
+   drmFreeVersion(version);
    vmw_printf("%s OK\n", __FUNCTION__);
    return TRUE;
-
- out_err1:
+  out_no_caps:
+   free(vws->ioctl.cap_3d);
+  out_no_caparray:
+   free(cap_buffer);
+  out_no_3d:
+   drmFreeVersion(version);
+  out_no_version:
+   vws->ioctl.num_cap_3d = 0;
    debug_printf("%s Failed\n", __FUNCTION__);
    return FALSE;
 }
@@ -524,6 +1198,4 @@ void
 vmw_ioctl_cleanup(struct vmw_winsys_screen *vws)
 {
    VMW_FUNC;
-
-   vmw_ioctl_fifo_unmap(vws, (void *)vws->ioctl.fifo_map);
 }