svga: Add a limit to the maximum surface size
[mesa.git] / src / gallium / drivers / svga / svga_resource_texture.c
index ff83c750aafa57085ecc7e2557301e4c9fd71335..75e27458e9c198995038a570fd3d8a2e72c8ff68 100644 (file)
  *
  **********************************************************/
 
-#include "svga_cmd.h"
+#include "svga3d_reg.h"
+#include "svga3d_surfacedefs.h"
 
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
-#include "util/u_inlines.h"
 #include "os/os_thread.h"
 #include "util/u_format.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_resource.h"
 
+#include "svga_cmd.h"
+#include "svga_format.h"
 #include "svga_screen.h"
 #include "svga_context.h"
 #include "svga_resource_texture.h"
 #define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
 
 
-static unsigned int
-svga_texture_is_referenced( struct pipe_context *pipe,
-                           struct pipe_resource *texture,
-                           unsigned face, unsigned level)
-{
-   struct svga_texture *tex = svga_texture(texture);
-   struct svga_screen *ss = svga_screen(pipe->screen);
-
-   /**
-    * The screen does not cache texture writes.
-    */
-
-   if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle))
-      return PIPE_UNREFERENCED;
-
-   /**
-    * sws->surface_is_flushed() does not distinguish between read references
-    * and write references. So assume a reference is both.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-
-
-/*
- * Helper function and arrays
- */
-
-SVGA3dSurfaceFormat
-svga_translate_format(enum pipe_format format)
-{
-   switch(format) {
-   
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      return SVGA3D_A8R8G8B8;
-   case PIPE_FORMAT_B8G8R8X8_UNORM:
-      return SVGA3D_X8R8G8B8;
-
-      /* Required for GL2.1:
-       */
-   case PIPE_FORMAT_B8G8R8A8_SRGB:
-      return SVGA3D_A8R8G8B8;
-
-   case PIPE_FORMAT_B5G6R5_UNORM:
-      return SVGA3D_R5G6B5;
-   case PIPE_FORMAT_B5G5R5A1_UNORM:
-      return SVGA3D_A1R5G5B5;
-   case PIPE_FORMAT_B4G4R4A4_UNORM:
-      return SVGA3D_A4R4G4B4;
-
-      
-   /* XXX: Doesn't seem to work properly.
-   case PIPE_FORMAT_Z32_UNORM:
-      return SVGA3D_Z_D32;
-    */
-   case PIPE_FORMAT_Z16_UNORM:
-      return SVGA3D_Z_D16;
-   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
-      return SVGA3D_Z_D24S8;
-   case PIPE_FORMAT_X8Z24_UNORM:
-      return SVGA3D_Z_D24X8;
-
-   case PIPE_FORMAT_A8_UNORM:
-      return SVGA3D_ALPHA8;
-   case PIPE_FORMAT_L8_UNORM:
-      return SVGA3D_LUMINANCE8;
-
-   case PIPE_FORMAT_DXT1_RGB:
-   case PIPE_FORMAT_DXT1_RGBA:
-      return SVGA3D_DXT1;
-   case PIPE_FORMAT_DXT3_RGBA:
-      return SVGA3D_DXT3;
-   case PIPE_FORMAT_DXT5_RGBA:
-      return SVGA3D_DXT5;
-
-   default:
-      return SVGA3D_FORMAT_INVALID;
-   }
-}
-
-
-SVGA3dSurfaceFormat
-svga_translate_format_render(enum pipe_format format)
-{
-   switch(format) { 
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-   case PIPE_FORMAT_B8G8R8X8_UNORM:
-   case PIPE_FORMAT_B5G5R5A1_UNORM:
-   case PIPE_FORMAT_B4G4R4A4_UNORM:
-   case PIPE_FORMAT_B5G6R5_UNORM:
-   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z32_UNORM:
-   case PIPE_FORMAT_Z16_UNORM:
-   case PIPE_FORMAT_L8_UNORM:
-      return svga_translate_format(format);
-
-#if 1
-   /* For on host conversion */
-   case PIPE_FORMAT_DXT1_RGB:
-      return SVGA3D_X8R8G8B8;
-   case PIPE_FORMAT_DXT1_RGBA:
-   case PIPE_FORMAT_DXT3_RGBA:
-   case PIPE_FORMAT_DXT5_RGBA:
-      return SVGA3D_A8R8G8B8;
-#endif
-
-   default:
-      return SVGA3D_FORMAT_INVALID;
-   }
-}
-
-
 static INLINE void
 svga_transfer_dma_band(struct svga_context *svga,
                        struct svga_transfer *st,
                        SVGA3dTransferType transfer,
-                       unsigned y, unsigned h, unsigned srcy)
+                       unsigned y, unsigned h, unsigned srcy,
+                       SVGA3dSurfaceDMAFlags flags)
 {
    struct svga_texture *texture = svga_texture(st->base.resource); 
    SVGA3dCopyBox box;
    enum pipe_error ret;
-   
-   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
-                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
-                texture->handle,
-                st->base.sr.face,
-                st->base.box.x,
-                y,
-                st->base.box.z,
-                st->base.box.x + st->base.box.width,
-                y + h,
-                st->base.box.z + 1,
-                util_format_get_blocksize(texture->b.b.format) * 8 /
-                (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
-   
+   assert(!st->use_direct_map);
+
    box.x = st->base.box.x;
    box.y = y;
    box.z = st->base.box.z;
@@ -195,10 +75,30 @@ svga_transfer_dma_band(struct svga_context *svga,
    box.srcy = srcy;
    box.srcz = 0;
 
-   ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
+   if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
+      st->face = st->base.box.z;
+      box.z = 0;
+   }
+   else
+      st->face = 0;
+
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
+                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
+                texture->handle,
+                st->face,
+                st->base.box.x,
+                y,
+                box.z,
+                st->base.box.x + st->base.box.width,
+                y + h,
+                box.z + 1,
+                util_format_get_blocksize(texture->b.b.format) * 8 /
+                (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+
+   ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
    if(ret != PIPE_OK) {
-      svga->swc->flush(svga->swc, NULL);
-      ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
       assert(ret == PIPE_OK);
    }
 }
@@ -207,22 +107,31 @@ svga_transfer_dma_band(struct svga_context *svga,
 static INLINE void
 svga_transfer_dma(struct svga_context *svga,
                   struct svga_transfer *st,
-                  SVGA3dTransferType transfer)
+                  SVGA3dTransferType transfer,
+                  SVGA3dSurfaceDMAFlags flags)
 {
    struct svga_texture *texture = svga_texture(st->base.resource); 
    struct svga_screen *screen = svga_screen(texture->b.b.screen);
    struct svga_winsys_screen *sws = screen->sws;
    struct pipe_fence_handle *fence = NULL;
-   
+
+   assert(!st->use_direct_map);
+
    if (transfer == SVGA3D_READ_HOST_VRAM) {
       SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
    }
 
+   /* Ensure any pending operations on host surfaces are queued on the command
+    * buffer first.
+    */
+   svga_surfaces_flush( svga );
 
    if(!st->swbuf) {
       /* Do the DMA transfer in a single go */
-      
-      svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0);
+
+      svga_transfer_dma_band(svga, st, transfer,
+                             st->base.box.y, st->base.box.height, 0,
+                             flags);
 
       if(transfer == SVGA3D_READ_HOST_VRAM) {
          svga_context_flush(svga, &fence);
@@ -231,7 +140,7 @@ svga_transfer_dma(struct svga_context *svga,
       }
    }
    else {
-      unsigned y, h, srcy;
+      int y, h, srcy;
       unsigned blockheight = util_format_get_blockheight(st->base.resource->format);
       h = st->hw_nblocksy * blockheight;
       srcy = 0;
@@ -245,31 +154,39 @@ svga_transfer_dma(struct svga_context *svga,
          /* Transfer band must be aligned to pixel block boundaries */
          assert(y % blockheight == 0);
          assert(h % blockheight == 0);
-         
+
          offset = y * st->base.stride / blockheight;
          length = h * st->base.stride / blockheight;
 
          sw = (uint8_t *)st->swbuf + offset;
-         
-         if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+
+         if (transfer == SVGA3D_WRITE_HOST_VRAM) {
+            unsigned usage = PIPE_TRANSFER_WRITE;
+
             /* Wait for the previous DMAs to complete */
             /* TODO: keep one DMA (at half the size) in the background */
-            if(y) {
-               svga_context_flush(svga, &fence);
-               sws->fence_finish(sws, fence, 0);
-               sws->fence_reference(sws, &fence, NULL);
+            if (y) {
+               svga_context_flush(svga, NULL);
+               usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
             }
 
-            hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_WRITE);
+            hw = sws->buffer_map(sws, st->hwbuf, usage);
             assert(hw);
-            if(hw) {
+            if (hw) {
                memcpy(hw, sw, length);
                sws->buffer_unmap(sws, st->hwbuf);
             }
          }
-         
-         svga_transfer_dma_band(svga, st, transfer, y, h, srcy);
-         
+
+         svga_transfer_dma_band(svga, st, transfer, y, h, srcy, flags);
+
+         /*
+          * Prevent the texture contents to be discarded on the next band
+          * upload.
+          */
+
+         flags.discard = FALSE;
+
          if(transfer == SVGA3D_READ_HOST_VRAM) {
             svga_context_flush(svga, &fence);
             sws->fence_finish(sws, fence, 0);
@@ -286,9 +203,6 @@ svga_transfer_dma(struct svga_context *svga,
 }
 
 
-
-
-
 static boolean 
 svga_texture_get_handle(struct pipe_screen *screen,
                                struct pipe_resource *texture,
@@ -310,7 +224,7 @@ svga_texture_destroy(struct pipe_screen *screen,
                     struct pipe_resource *pt)
 {
    struct svga_screen *ss = svga_screen(screen);
-   struct svga_texture *tex = (struct svga_texture *)pt;
+   struct svga_texture *tex = svga_texture(pt);
 
    ss->texture_timestamp++;
 
@@ -322,106 +236,308 @@ svga_texture_destroy(struct pipe_screen *screen,
    SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
    svga_screen_surface_destroy(ss, &tex->key, &tex->handle);
 
+   ss->total_resource_bytes -= tex->size;
+
+   FREE(tex->rendered_to);
    FREE(tex);
 }
 
 
+/**
+ * Determine if we need to read back a texture image before mapping it.
+ */
+static boolean
+need_tex_readback(struct pipe_transfer *transfer)
+{
+   struct svga_texture *t = svga_texture(transfer->resource);
+
+   if (transfer->usage & PIPE_TRANSFER_READ)
+      return TRUE;
+
+   if ((transfer->usage & PIPE_TRANSFER_WRITE) &&
+       ((transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) == 0)) {
+      unsigned face;
 
+      if (transfer->resource->target == PIPE_TEXTURE_CUBE) {
+         assert(transfer->box.depth == 1);
+         face = transfer->box.z;
+      }
+      else {
+         face = 0;
+      }
+      if (svga_was_texture_rendered_to(t, face, transfer->level)) {
+         return TRUE;
+      }
+   }
 
+   return FALSE;
+}
 
 
 
 /* XXX: Still implementing this as if it was a screen function, but
  * can now modify it to queue transfers on the context.
  */
-static struct pipe_transfer *
-svga_texture_get_transfer(struct pipe_context *pipe,
-                         struct pipe_resource *texture,
-                         struct pipe_subresource sr,
-                         unsigned usage,
-                         const struct pipe_box *box)
+static void *
+svga_texture_transfer_map(struct pipe_context *pipe,
+                          struct pipe_resource *texture,
+                          unsigned level,
+                          unsigned usage,
+                          const struct pipe_box *box,
+                          struct pipe_transfer **ptransfer)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
    struct svga_winsys_screen *sws = ss->sws;
    struct svga_transfer *st;
-   unsigned nblocksx = util_format_get_nblocksx(texture->format, box->width);
-   unsigned nblocksy = util_format_get_nblocksy(texture->format, box->height);
-
-   /* We can't map texture storage directly */
-   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
-      return NULL;
+   unsigned nblocksx, nblocksy;
+   boolean use_direct_map = svga_have_gb_objects(svga) &&
+      !svga_have_gb_dma(svga);
+   unsigned d;
+
+   /* We can't map texture storage directly unless we have GB objects */
+   if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
+      if (svga_have_gb_objects(svga))
+         use_direct_map = TRUE;
+      else
+         return NULL;
+   }
 
    st = CALLOC_STRUCT(svga_transfer);
    if (!st)
       return NULL;
-   
+
+   {
+      unsigned w, h;
+      if (use_direct_map) {
+         /* we'll directly access the guest-backed surface */
+         w = u_minify(texture->width0, level);
+         h = u_minify(texture->height0, level);
+         d = u_minify(texture->depth0, level);
+      }
+      else {
+         /* we'll put the data into a tightly packed buffer */
+         w = box->width;
+         h = box->height;
+         d = box->depth;
+      }
+      nblocksx = util_format_get_nblocksx(texture->format, w);
+      nblocksy = util_format_get_nblocksy(texture->format, h);
+   }
+
    pipe_resource_reference(&st->base.resource, texture);
-   st->base.sr = sr;
+   st->base.level = level;
    st->base.usage = usage;
    st->base.box = *box;
    st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
-   st->base.slice_stride = 0;
+   st->base.layer_stride = st->base.stride * nblocksy;
+
+   if (!use_direct_map) {
+      /* Use a DMA buffer */
+      st->hw_nblocksy = nblocksy;
 
-   st->hw_nblocksy = nblocksy;
-   
-   st->hwbuf = svga_winsys_buffer_create(svga,
-                                         1, 
-                                         0,
-                                         st->hw_nblocksy*st->base.stride);
-   while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
       st->hwbuf = svga_winsys_buffer_create(svga,
                                             1, 
                                             0,
-                                            st->hw_nblocksy*st->base.stride);
+                                            st->hw_nblocksy * st->base.stride * d);
+      while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
+         st->hwbuf = svga_winsys_buffer_create(svga,
+                                               1, 
+                                               0,
+                                               st->hw_nblocksy * st->base.stride * d);
+      }
+
+      if (!st->hwbuf) {
+         FREE(st);
+         return NULL;
+      }
+
+      if(st->hw_nblocksy < nblocksy) {
+         /* We couldn't allocate a hardware buffer big enough for the transfer, 
+          * so allocate regular malloc memory instead */
+         if (0) {
+            debug_printf("%s: failed to allocate %u KB of DMA, "
+                         "splitting into %u x %u KB DMA transfers\n",
+                         __FUNCTION__,
+                         (nblocksy*st->base.stride + 1023)/1024,
+                         (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+                         (st->hw_nblocksy*st->base.stride + 1023)/1024);
+         }
+
+         st->swbuf = MALLOC(nblocksy * st->base.stride * d);
+         if (!st->swbuf) {
+            sws->buffer_destroy(sws, st->hwbuf);
+            FREE(st);
+            return NULL;
+         }
+      }
+
+      if (usage & PIPE_TRANSFER_READ) {
+         SVGA3dSurfaceDMAFlags flags;
+         memset(&flags, 0, sizeof flags);
+         svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags);
+      }
+   } else {
+      struct pipe_transfer *transfer = &st->base;
+      struct svga_texture *tex = svga_texture(transfer->resource);
+      struct svga_winsys_surface *surf = tex->handle;
+      unsigned face;
+
+      assert(surf);
+
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+        face = transfer->box.z;
+      } else {
+        face = 0;
+      }
+
+      if (need_tex_readback(transfer)) {
+        SVGA3dBox box;
+        enum pipe_error ret;
+
+        box.x = transfer->box.x;
+        box.y = transfer->box.y;
+        box.w = transfer->box.width;
+        box.h = transfer->box.height;
+        box.d = transfer->box.depth;
+        if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+           box.z = 0;
+        }
+        else {
+           box.z = transfer->box.z;
+        }
+
+         (void) box;  /* not used at this time */
+
+         svga_surfaces_flush(svga);
+
+        ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+
+        if (ret != PIPE_OK) {
+           svga_context_flush(svga, NULL);
+           ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+           assert(ret == PIPE_OK);
+        }
+
+        svga_context_flush(svga, NULL);
+
+         /*
+          * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
+          * we could potentially clear the flag for all faces/layers/mips.
+          */
+         svga_clear_texture_rendered_to(tex, face, transfer->level);
+      }
+      else {
+        assert(transfer->usage & PIPE_TRANSFER_WRITE);
+        if ((transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) == 0) {
+            svga_surfaces_flush(svga);
+            if (!sws->surface_is_flushed(sws, surf))
+               svga_context_flush(svga, NULL);
+        }
+      }
    }
 
-   if(!st->hwbuf)
-      goto no_hwbuf;
-
-   if(st->hw_nblocksy < nblocksy) {
-      /* We couldn't allocate a hardware buffer big enough for the transfer, 
-       * so allocate regular malloc memory instead */
-      debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
-                   __FUNCTION__,
-                   (nblocksy*st->base.stride + 1023)/1024,
-                   (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
-                   (st->hw_nblocksy*st->base.stride + 1023)/1024);
-      st->swbuf = MALLOC(nblocksy*st->base.stride);
-      if(!st->swbuf)
-         goto no_swbuf;
+   st->use_direct_map = use_direct_map;
+
+   *ptransfer = &st->base;
+
+   /*
+    * Begin mapping code
+    */
+   if (st->swbuf) {
+      return st->swbuf;
    }
-   
-   if (usage & PIPE_TRANSFER_READ)
-      svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM);
+   else if (!st->use_direct_map) {
+      return sws->buffer_map(sws, st->hwbuf, usage);
+   }
+   else {
+      struct svga_screen *screen = svga_screen(svga->pipe.screen);
+      SVGA3dSurfaceFormat format;
+      SVGA3dSize baseLevelSize;
+      struct svga_texture *tex = svga_texture(texture);
+      struct svga_winsys_surface *surf = tex->handle;
+      uint8_t *map;
+      boolean retry;
+      unsigned face, offset, mip_width, mip_height;
+      unsigned xoffset = box->x;
+      unsigned yoffset = box->y;
+      unsigned zoffset = box->z;
+
+      map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
+      if (map == NULL && retry) {
+         /*
+          * At this point, the svga_surfaces_flush() should already have
+          * called in svga_texture_get_transfer().
+          */
+         svga_context_flush(svga, NULL);
+         map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
+      }
 
-   return &st->base;
+      /*
+       * Make sure whe return NULL if the map fails
+       */
+      if (map == NULL) {
+         FREE(st);
+         return map;
+      }
 
-no_swbuf:
-   sws->buffer_destroy(sws, st->hwbuf);
-no_hwbuf:
-   FREE(st);
-   return NULL;
+      /**
+       * Compute the offset to the specific texture slice in the buffer.
+       */
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+         face = zoffset;
+         zoffset = 0;
+      } else {
+         face = 0;
+      }
+
+      format = svga_translate_format(screen, tex->b.b.format, 0);
+      baseLevelSize.width = tex->b.b.width0;
+      baseLevelSize.height = tex->b.b.height0;
+      baseLevelSize.depth = tex->b.b.depth0;
+
+      offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+                                              tex->b.b.last_level + 1, /* numMips */
+                                              face, level);
+      if (level > 0) {
+         assert(offset > 0);
+      }
+
+      mip_width = u_minify(tex->b.b.width0, level);
+      mip_height = u_minify(tex->b.b.height0, level);
+
+      offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+                                               xoffset, yoffset, zoffset);
+
+      return (void *) (map + offset);
+   }
 }
 
 
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
+/**
+ * Unmap a GB texture surface.
  */
-static void *
-svga_texture_transfer_map( struct pipe_context *pipe,
-                          struct pipe_transfer *transfer )
+static void
+svga_texture_surface_unmap(struct svga_context *svga,
+                           struct pipe_transfer *transfer)
 {
-   struct svga_screen *ss = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = ss->sws;
-   struct svga_transfer *st = svga_transfer(transfer);
-
-   if(st->swbuf)
-      return st->swbuf;
-   else
-      /* The wait for read transfers already happened when svga_transfer_dma
-       * was called. */
-      return sws->buffer_map(sws, st->hwbuf, transfer->usage);
+   struct svga_winsys_surface *surf = svga_texture(transfer->resource)->handle;
+   struct svga_winsys_context *swc = svga->swc;
+   boolean rebind;
+
+   assert(surf);
+
+   swc->surface_unmap(swc, surf, &rebind);
+   if (rebind) {
+      enum pipe_error ret;
+      ret = SVGA3D_BindGBSurface(swc, surf);
+      if (ret != PIPE_OK) {
+         /* flush and retry */
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_BindGBSurface(swc, surf);
+         assert(ret == PIPE_OK);
+      }
+   }
 }
 
 
@@ -432,49 +548,97 @@ static void
 svga_texture_transfer_unmap(struct pipe_context *pipe,
                            struct pipe_transfer *transfer)
 {
+   struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
    struct svga_winsys_screen *sws = ss->sws;
    struct svga_transfer *st = svga_transfer(transfer);
-   
-   if(!st->swbuf)
-      sws->buffer_unmap(sws, st->hwbuf);
-}
+   struct svga_texture *tex = svga_texture(transfer->resource);
 
+   if (!st->swbuf) {
+      if (st->use_direct_map) {
+         svga_texture_surface_unmap(svga, transfer);
+      }
+      else {
+         sws->buffer_unmap(sws, st->hwbuf);
+      }
+   }
 
-static void
-svga_texture_transfer_destroy(struct pipe_context *pipe,
-                             struct pipe_transfer *transfer)
-{
-   struct svga_context *svga = svga_context(pipe);
-   struct svga_texture *tex = svga_texture(transfer->resource);
-   struct svga_screen *ss = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = ss->sws;
-   struct svga_transfer *st = svga_transfer(transfer);
+   if (!st->use_direct_map && (st->base.usage & PIPE_TRANSFER_WRITE)) {
+      /* Use DMA to transfer texture data */
+      SVGA3dSurfaceDMAFlags flags;
 
-   if (st->base.usage & PIPE_TRANSFER_WRITE) {
-      svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM);
-      ss->texture_timestamp++;
-      tex->view_age[transfer->sr.level] = ++(tex->age);
-      tex->defined[transfer->sr.face][transfer->sr.level] = TRUE;
+      memset(&flags, 0, sizeof flags);
+      if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+         flags.discard = TRUE;
+      }
+      if (transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+         flags.unsynchronized = TRUE;
+      }
+
+      svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags);
+   } else if (transfer->usage & PIPE_TRANSFER_WRITE) {
+      struct svga_winsys_surface *surf =
+        svga_texture(transfer->resource)->handle;
+      unsigned face;
+      SVGA3dBox box;
+      enum pipe_error ret;
+
+      assert(svga_have_gb_objects(svga));
+
+      /* update the effected region */
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+        face = transfer->box.z;
+      } else {
+        face = 0;
+      }
+
+      box.x = transfer->box.x;
+      box.y = transfer->box.y;
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+         box.z = 0;
+      }
+      else {
+         box.z = transfer->box.z;
+      }
+      box.w = transfer->box.width;
+      box.h = transfer->box.height;
+      box.d = transfer->box.depth;
+
+      if (0)
+         debug_printf("%s %d, %d, %d  %d x %d x %d\n",
+                      __FUNCTION__,
+                      box.x, box.y, box.z,
+                      box.w, box.h, box.d);
+
+      ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
+         assert(ret == PIPE_OK);
+      }
    }
 
+   ss->texture_timestamp++;
+   svga_age_texture_view(tex, transfer->level);
+   if (transfer->resource->target == PIPE_TEXTURE_CUBE)
+      svga_define_texture_level(tex, transfer->box.z, transfer->level);
+   else
+      svga_define_texture_level(tex, 0, transfer->level);
+
    pipe_resource_reference(&st->base.resource, NULL);
+
    FREE(st->swbuf);
-   sws->buffer_destroy(sws, st->hwbuf);
+   if (!st->use_direct_map) {
+      sws->buffer_destroy(sws, st->hwbuf);
+   }
    FREE(st);
 }
 
 
-
-
-
 struct u_resource_vtbl svga_texture_vtbl = 
 {
    svga_texture_get_handle,          /* get_handle */
    svga_texture_destroy,             /* resource_destroy */
-   svga_texture_is_referenced,       /* is_resource_referenced */
-   svga_texture_get_transfer,        /* get_transfer */
-   svga_texture_transfer_destroy,     /* transfer_destroy */
    svga_texture_transfer_map,        /* transfer_map */
    u_default_transfer_flush_region,   /* transfer_flush_region */
    svga_texture_transfer_unmap,              /* transfer_unmap */
@@ -482,15 +646,13 @@ struct u_resource_vtbl svga_texture_vtbl =
 };
 
 
-
-
 struct pipe_resource *
 svga_texture_create(struct pipe_screen *screen,
                     const struct pipe_resource *template)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
    struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
-   
+
    if (!tex)
       goto error1;
 
@@ -507,7 +669,7 @@ svga_texture_create(struct pipe_screen *screen,
    tex->key.size.width = template->width0;
    tex->key.size.height = template->height0;
    tex->key.size.depth = template->depth0;
-   
+
    if(template->target == PIPE_TEXTURE_CUBE) {
       tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
       tex->key.numFaces = 6;
@@ -516,6 +678,10 @@ svga_texture_create(struct pipe_screen *screen,
       tex->key.numFaces = 1;
    }
 
+   if (template->target == PIPE_TEXTURE_3D) {
+      tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+   }
+
    tex->key.cachable = 1;
 
    if (template->bind & PIPE_BIND_SAMPLER_VIEW)
@@ -529,48 +695,64 @@ svga_texture_create(struct pipe_screen *screen,
       tex->key.cachable = 0;
    }
 
-   if (template->bind & PIPE_BIND_SCANOUT) {
+   if (template->bind & (PIPE_BIND_SCANOUT |
+                         PIPE_BIND_CURSOR)) {
       tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
       tex->key.cachable = 0;
    }
-   
+
    /* 
-    * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
+    * Note: Previously we never passed the
+    * SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
     * know beforehand whether a texture will be used as a rendertarget or not
     * and it always requests PIPE_BIND_RENDER_TARGET, therefore
     * passing the SVGA3D_SURFACE_HINT_RENDERTARGET here defeats its purpose.
+    *
+    * However, this was changed since other state trackers
+    * (XA for example) uses it accurately and certain device versions
+    * relies on it in certain situations to render correctly.
     */
-#if 0
    if((template->bind & PIPE_BIND_RENDER_TARGET) &&
       !util_format_is_s3tc(template->format))
       tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
-#endif
    
    if(template->bind & PIPE_BIND_DEPTH_STENCIL)
       tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
    
    tex->key.numMipLevels = template->last_level + 1;
    
-   tex->key.format = svga_translate_format(template->format);
+   tex->key.format = svga_translate_format(svgascreen, template->format, template->bind);
    if(tex->key.format == SVGA3D_FORMAT_INVALID)
       goto error2;
 
    SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
    tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
-   if (tex->handle)
-      SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);
+   if (!tex->handle)
+       goto error2;
+
+   SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);
+
+   debug_reference(&tex->b.b.reference,
+                   (debug_reference_descriptor)debug_describe_resource, 0);
+
+   tex->size = util_resource_size(template);
+   svgascreen->total_resource_bytes += tex->size;
+
+   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+                             sizeof(tex->rendered_to[0]));
+   if (!tex->rendered_to)
+      goto error2;
 
    return &tex->b.b;
 
 error2:
+   FREE(tex->rendered_to);
    FREE(tex);
 error1:
    return NULL;
 }
 
 
-
-
 struct pipe_resource *
 svga_texture_from_handle(struct pipe_screen *screen,
                         const struct pipe_resource *template,
@@ -583,7 +765,8 @@ svga_texture_from_handle(struct pipe_screen *screen,
    assert(screen);
 
    /* Only supports one type */
-   if (template->target != PIPE_TEXTURE_2D ||
+   if ((template->target != PIPE_TEXTURE_2D &&
+       template->target != PIPE_TEXTURE_RECT) ||
        template->last_level != 0 ||
        template->depth0 != 1) {
       return NULL;
@@ -594,14 +777,15 @@ svga_texture_from_handle(struct pipe_screen *screen,
    if (!srf)
       return NULL;
 
-   if (svga_translate_format(template->format) != format) {
-      unsigned f1 = svga_translate_format(template->format);
+   if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) {
+      unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind);
       unsigned f2 = format;
 
       /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
       if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
               (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
-              (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) {
+              (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ||
+              (f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) {
          debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
          return NULL;
       }
@@ -616,19 +800,12 @@ svga_texture_from_handle(struct pipe_screen *screen,
    pipe_reference_init(&tex->b.b.reference, 1);
    tex->b.b.screen = screen;
 
-   if (format == SVGA3D_X8R8G8B8)
-      tex->b.b.format = PIPE_FORMAT_B8G8R8X8_UNORM;
-   else if (format == SVGA3D_A8R8G8B8)
-      tex->b.b.format = PIPE_FORMAT_B8G8R8A8_UNORM;
-   else {
-      /* ?? */
-   }
-
    SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf);
 
    tex->key.cachable = 0;
    tex->handle = srf;
 
+   tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+
    return &tex->b.b;
 }
-