i965/gen9: Optimize slice and subslice load balancing behavior.

[mesa.git] / src / mesa / drivers / dri / i965 / intel_tex_image.c
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c

index 39c1c9a454a2c5c31d56eaa06c251aaafa445513..ccaa9ef7474714c10bd84ac345eea1d55ac7c11f 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -14,6 +14,7 @@
  #include "main/texobj.h"
  #include "main/teximage.h"
  #include "main/texstore.h"
  #include "main/texobj.h"
  #include "main/teximage.h"
  #include "main/texstore.h"
+#include "main/glthread.h"
  
  #include "drivers/common/meta.h"
  
  
  #include "drivers/common/meta.h"
  
@@ -21,11 +22,10 @@
  #include "intel_buffer_objects.h"
  #include "intel_batchbuffer.h"
  #include "intel_tex.h"
  #include "intel_buffer_objects.h"
  #include "intel_batchbuffer.h"
  #include "intel_tex.h"
-#include "intel_blit.h"
  #include "intel_fbo.h"
  #include "intel_image.h"
  #include "intel_fbo.h"
  #include "intel_image.h"
-#include "intel_tiled_memcpy.h"
  #include "brw_context.h"
  #include "brw_context.h"
+#include "brw_blorp.h"
  
  #define FILE_DEBUG_FLAG DEBUG_TEXTURE
  
  
  #define FILE_DEBUG_FLAG DEBUG_TEXTURE
  
@@ -127,6 +127,230 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
                                 flags);
  }
  
                                 flags);
  }
  
+static bool
+intel_texsubimage_blorp(struct brw_context *brw, GLuint dims,
+                        struct gl_texture_image *tex_image,
+                        unsigned x, unsigned y, unsigned z,
+                        unsigned width, unsigned height, unsigned depth,
+                        GLenum format, GLenum type, const void *pixels,
+                        const struct gl_pixelstore_attrib *packing)
+{
+   struct intel_texture_image *intel_image = intel_texture_image(tex_image);
+   const unsigned mt_level = tex_image->Level + tex_image->TexObject->MinLevel;
+   const unsigned mt_z = tex_image->TexObject->MinLayer + tex_image->Face + z;
+
+   /* The blorp path can't understand crazy format hackery */
+   if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
+       _mesa_get_format_base_format(tex_image->TexFormat))
+      return false;
+
+   return brw_blorp_upload_miptree(brw, intel_image->mt, tex_image->TexFormat,
+                                   mt_level, x, y, mt_z, width, height, depth,
+                                   tex_image->TexObject->Target, format, type,
+                                   pixels, packing);
+}
+
+/**
+ * \brief A fast path for glTexImage and glTexSubImage.
+ *
+ * This fast path is taken when the texture format is BGRA, RGBA,
+ * A or L and when the texture memory is X- or Y-tiled.  It uploads
+ * the texture data by mapping the texture memory without a GTT fence, thus
+ * acquiring a tiled view of the memory, and then copying sucessive
+ * spans within each tile.
+ *
+ * This is a performance win over the conventional texture upload path because
+ * it avoids the performance penalty of writing through the write-combine
+ * buffer. In the conventional texture upload path,
+ * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT
+ * fence, thus acquiring a linear view of the memory, then each row in the
+ * image is memcpy'd. In this fast path, we replace each row's copy with
+ * a sequence of copies over each linear span in tile.
+ *
+ * One use case is Google Chrome's paint rectangles.  Chrome (as
+ * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures.
+ * Each page's content is initially uploaded with glTexImage2D and damaged
+ * regions are updated with glTexSubImage2D. On some workloads, the
+ * performance gain of this fastpath on Sandybridge is over 5x.
+ */
+static bool
+intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
+                               GLuint dims,
+                               struct gl_texture_image *texImage,
+                               GLint xoffset, GLint yoffset, GLint zoffset,
+                               GLsizei width, GLsizei height, GLsizei depth,
+                               GLenum format, GLenum type,
+                               const GLvoid *pixels,
+                               const struct gl_pixelstore_attrib *packing)
+{
+   struct brw_context *brw = brw_context(ctx);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   struct intel_texture_image *image = intel_texture_image(texImage);
+   int src_pitch;
+
+   /* The miptree's buffer. */
+   struct brw_bo *bo;
+
+   uint32_t cpp;
+   isl_memcpy_type copy_type;
+
+   /* This fastpath is restricted to specific texture types:
+    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
+    * more types.
+    *
+    * FINISHME: The restrictions below on packing alignment and packing row
+    * length are likely unneeded now because we calculate the source stride
+    * with _mesa_image_row_stride. However, before removing the restrictions
+    * we need tests.
+    */
+   if (!devinfo->has_llc ||
+       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+       !(texImage->TexObject->Target == GL_TEXTURE_2D ||
+         texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
+       pixels == NULL ||
+       _mesa_is_bufferobj(packing->BufferObj) ||
+       packing->Alignment > 4 ||
+       packing->SkipPixels > 0 ||
+       packing->SkipRows > 0 ||
+       (packing->RowLength != 0 && packing->RowLength != width) ||
+       packing->SwapBytes ||
+       packing->LsbFirst ||
+       packing->Invert)
+      return false;
+
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
+   copy_type = intel_miptree_get_memcpy_type(texImage->TexFormat, format, type,
+                                             &cpp);
+   if (copy_type == ISL_MEMCPY_INVALID)
+      return false;
+
+   /* If this is a nontrivial texture view, let another path handle it instead. */
+   if (texImage->TexObject->MinLayer)
+      return false;
+
+   if (!image->mt ||
+       (image->mt->surf.tiling != ISL_TILING_X &&
+        image->mt->surf.tiling != ISL_TILING_Y0)) {
+      /* The algorithm is written only for X- or Y-tiled memory. */
+      return false;
+   }
+
+   /* linear_to_tiled() assumes that if the object is swizzled, it is using
+    * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.  This is only
+    * true on gen5 and above.
+    *
+    * The killer on top is that some gen4 have an L-shaped swizzle mode, where
+    * parts of the memory aren't swizzled at all. Userspace just can't handle
+    * that.
+    */
+   if (devinfo->gen < 5 && brw->has_swizzling)
+      return false;
+
+   int level = texImage->Level + texImage->TexObject->MinLevel;
+
+   /* Since we are going to write raw data to the miptree, we need to resolve
+    * any pending fast color clears before we start.
+    */
+   assert(image->mt->surf.logical_level0_px.depth == 1);
+   assert(image->mt->surf.logical_level0_px.array_len == 1);
+
+   intel_miptree_access_raw(brw, image->mt, level, 0, true);
+
+   bo = image->mt->bo;
+
+   if (brw_batch_references(&brw->batch, bo)) {
+      perf_debug("Flushing before mapping a referenced bo.\n");
+      intel_batchbuffer_flush(brw);
+   }
+
+   void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW);
+   if (map == NULL) {
+      DBG("%s: failed to map bo\n", __func__);
+      return false;
+   }
+
+   src_pitch = _mesa_image_row_stride(packing, width, format, type);
+
+   /* We postponed printing this message until having committed to executing
+    * the function.
+    */
+   DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
+       "mesa_format=0x%x tiling=%d "
+       "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) ",
+       __func__, texImage->Level, xoffset, yoffset, width, height,
+       format, type, texImage->TexFormat, image->mt->surf.tiling,
+       packing->Alignment, packing->RowLength, packing->SkipPixels,
+       packing->SkipRows);
+
+   /* Adjust x and y offset based on miplevel */
+   unsigned level_x, level_y;
+   intel_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
+   xoffset += level_x;
+   yoffset += level_y;
+
+   isl_memcpy_linear_to_tiled(
+      xoffset * cpp, (xoffset + width) * cpp,
+      yoffset, yoffset + height,
+      map,
+      pixels,
+      image->mt->surf.row_pitch_B, src_pitch,
+      brw->has_swizzling,
+      image->mt->surf.tiling,
+      copy_type
+   );
+
+   brw_bo_unmap(bo);
+   return true;
+}
+
+
+static void
+intel_upload_tex(struct gl_context * ctx,
+                 GLuint dims,
+                 struct gl_texture_image *texImage,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_mipmap_tree *mt = intel_texture_image(texImage)->mt;
+   bool ok;
+
+   /* Check that there is actually data to store. */
+   if (pixels == NULL && !_mesa_is_bufferobj(packing->BufferObj))
+      return;
+
+   bool tex_busy = mt && brw_bo_busy(mt->bo);
+
+   if (_mesa_is_bufferobj(packing->BufferObj) || tex_busy ||
+       mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
+      ok = intel_texsubimage_blorp(brw, dims, texImage,
+                                   xoffset, yoffset, zoffset,
+                                   width, height, depth, format, type,
+                                   pixels, packing);
+      if (ok)
+         return;
+   }
+
+   ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
+                                       xoffset, yoffset, zoffset,
+                                       width, height, depth,
+                                       format, type, pixels, packing);
+   if (ok)
+     return;
+
+   _mesa_store_texsubimage(ctx, dims, texImage,
+                           xoffset, yoffset, zoffset,
+                           width, height, depth,
+                           format, type, pixels, packing);
+}
+
+
  static void
  intelTexImage(struct gl_context * ctx,
                GLuint dims,
  static void
  intelTexImage(struct gl_context * ctx,
                GLuint dims,
@@ -134,11 +358,6 @@ intelTexImage(struct gl_context * ctx,
                GLenum format, GLenum type, const void *pixels,
                const struct gl_pixelstore_attrib *unpack)
  {
                GLenum format, GLenum type, const void *pixels,
                const struct gl_pixelstore_attrib *unpack)
  {
-   struct intel_texture_image *intelImage = intel_texture_image(texImage);
-   bool ok;
-
-   bool tex_busy = intelImage->mt && brw_bo_busy(intelImage->mt->bo);
-
     DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
         __func__, _mesa_get_format_name(texImage->TexFormat),
         _mesa_enum_to_string(texImage->TexObject->Target),
     DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
         __func__, _mesa_get_format_name(texImage->TexFormat),
         _mesa_enum_to_string(texImage->TexObject->Target),
@@ -151,35 +370,32 @@ intelTexImage(struct gl_context * ctx,
        return;
     }
  
        return;
     }
  
-   assert(intelImage->mt);
-
-   if (intelImage->mt->format == MESA_FORMAT_S_UINT8)
-      intelImage->mt->r8stencil_needs_update = true;
+   assert(intel_texture_image(texImage)->mt);
  
  
-   ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage, 0, 0, 0,
-                                   texImage->Width, texImage->Height,
-                                   texImage->Depth,
-                                   format, type, pixels,
-                                   tex_busy, unpack);
-   if (ok)
-      return;
+   intel_upload_tex(ctx, dims, texImage, 0, 0, 0,
+                    texImage->Width, texImage->Height, texImage->Depth,
+                    format, type, pixels, unpack);
+}
  
  
-   ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
-                                       0, 0, 0, /*x,y,z offsets*/
-                                       texImage->Width,
-                                       texImage->Height,
-                                       texImage->Depth,
-                                       format, type, pixels, unpack,
-                                       false /*allocate_storage*/);
-   if (ok)
-      return;
  
  
-   DBG("%s: upload image %dx%dx%d pixels %p\n",
-       __func__, texImage->Width, texImage->Height, texImage->Depth,
-       pixels);
+static void
+intelTexSubImage(struct gl_context * ctx,
+                 GLuint dims,
+                 struct gl_texture_image *texImage,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing)
+{
+   DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
+       __func__, _mesa_get_format_name(texImage->TexFormat),
+       _mesa_enum_to_string(texImage->TexObject->Target),
+       _mesa_enum_to_string(format), _mesa_enum_to_string(type),
+       texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
  
  
-   _mesa_store_teximage(ctx, dims, texImage,
-                        format, type, pixels, unpack);
+   intel_upload_tex(ctx, dims, texImage, xoffset, yoffset, zoffset,
+                    width, height, depth, format, type, pixels, packing);
  }
  
  
  }
  
  
@@ -187,6 +403,7 @@ static void
  intel_set_texture_image_mt(struct brw_context *brw,
                             struct gl_texture_image *image,
                             GLenum internal_format,
  intel_set_texture_image_mt(struct brw_context *brw,
                             struct gl_texture_image *image,
                             GLenum internal_format,
+                           mesa_format format,
                             struct intel_mipmap_tree *mt)
  
  {
                             struct intel_mipmap_tree *mt)
  
  {
@@ -197,13 +414,13 @@ intel_set_texture_image_mt(struct brw_context *brw,
     _mesa_init_teximage_fields(&brw->ctx, image,
                                mt->surf.logical_level0_px.width,
                                mt->surf.logical_level0_px.height, 1,
     _mesa_init_teximage_fields(&brw->ctx, image,
                                mt->surf.logical_level0_px.width,
                                mt->surf.logical_level0_px.height, 1,
-                              0, internal_format, mt->format);
+                              0, internal_format, format);
  
     brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image);
  
     intel_texobj->needs_validate = true;
  
     brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image);
  
     intel_texobj->needs_validate = true;
-   intel_image->base.RowStride = mt->surf.row_pitch / mt->cpp;
-   assert(mt->surf.row_pitch % mt->cpp == 0);
+   intel_image->base.RowStride = mt->surf.row_pitch_B / mt->cpp;
+   assert(mt->surf.row_pitch_B % mt->cpp == 0);
  
     intel_miptree_reference(&intel_image->mt, mt);
  
  
     intel_miptree_reference(&intel_image->mt, mt);
  
@@ -224,9 +441,10 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
     struct gl_texture_object *texObj;
     struct gl_texture_image *texImage;
     mesa_format texFormat = MESA_FORMAT_NONE;
     struct gl_texture_object *texObj;
     struct gl_texture_image *texImage;
     mesa_format texFormat = MESA_FORMAT_NONE;
-   struct intel_mipmap_tree *mt;
     GLenum internal_format = 0;
  
     GLenum internal_format = 0;
  
+   _mesa_glthread_finish(ctx);
+
     texObj = _mesa_get_current_tex_object(ctx, target);
  
     if (!texObj)
     texObj = _mesa_get_current_tex_object(ctx, target);
  
     if (!texObj)
@@ -243,37 +461,102 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
     if (!rb || !rb->mt)
        return;
  
     if (!rb || !rb->mt)
        return;
  
+   /* Neither the EGL and GLX texture_from_pixmap specs say anything about
+    * sRGB.  They are both from a time where sRGB was considered an extra
+    * encoding step you did as part of rendering/blending and not a format.
+    * Even though we have concept of sRGB visuals, X has classically assumed
+    * that your data is just bits and sRGB rendering is entirely a client-side
+    * rendering construct.  The assumption is that the result of BindTexImage
+    * is a texture with a linear format even if it was rendered with sRGB
+    * encoding enabled.
+    */
+   texFormat = _mesa_get_srgb_format_linear(intel_rb_format(rb));
+
     if (rb->mt->cpp == 4) {
     if (rb->mt->cpp == 4) {
-      if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+      /* The extra texture_format parameter indicates whether the alpha
+       * channel should be respected or ignored.  If we set internal_format to
+       * GL_RGB, the texture handling code is smart enough to swap the format
+       * or apply a swizzle if the underlying format is RGBA so we don't need
+       * to stomp it to RGBX or anything like that.
+       */
+      if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
           internal_format = GL_RGB;
           internal_format = GL_RGB;
-         texFormat = MESA_FORMAT_B8G8R8X8_UNORM;
-      }
-      else {
+      else
           internal_format = GL_RGBA;
           internal_format = GL_RGBA;
-         texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
-      }
     } else if (rb->mt->cpp == 2) {
        internal_format = GL_RGB;
     } else if (rb->mt->cpp == 2) {
        internal_format = GL_RGB;
-      texFormat = MESA_FORMAT_B5G6R5_UNORM;
     }
  
     }
  
-   intel_miptree_make_shareable(brw, rb->mt);
-   mt = intel_miptree_create_for_bo(brw, rb->mt->bo, texFormat, 0,
-                                    rb->Base.Base.Width,
-                                    rb->Base.Base.Height,
-                                    1, rb->mt->surf.row_pitch,
-                                    MIPTREE_CREATE_DEFAULT);
-   if (mt == NULL)
-       return;
-   mt->target = target;
+   intel_miptree_finish_external(brw, rb->mt);
  
     _mesa_lock_texture(&brw->ctx, texObj);
     texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
  
     _mesa_lock_texture(&brw->ctx, texObj);
     texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
-   intel_set_texture_image_mt(brw, texImage, internal_format, mt);
-   intel_miptree_release(&mt);
+   intel_set_texture_image_mt(brw, texImage, internal_format,
+                              texFormat, rb->mt);
     _mesa_unlock_texture(&brw->ctx, texObj);
  }
  
     _mesa_unlock_texture(&brw->ctx, texObj);
  }
  
+void
+intelReleaseTexBuffer(__DRIcontext *pDRICtx, GLint target,
+                      __DRIdrawable *dPriv)
+{
+   struct brw_context *brw = pDRICtx->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_texture_object *tex_obj;
+   struct intel_texture_object *intel_tex;
+
+   tex_obj = _mesa_get_current_tex_object(ctx, target);
+   if (!tex_obj)
+      return;
+
+   _mesa_lock_texture(&brw->ctx, tex_obj);
+
+   intel_tex = intel_texture_object(tex_obj);
+   if (!intel_tex->mt) {
+      _mesa_unlock_texture(&brw->ctx, tex_obj);
+      return;
+   }
+
+   /* The intel_miptree_prepare_external below as well as the finish_external
+    * above in intelSetTexBuffer2 *should* do nothing.  The BindTexImage call
+    * from both GLX and EGL has TexImage2D and not TexSubImage2D semantics so
+    * the texture is not immutable.  This means that the user cannot create a
+    * texture view of the image with a different format.  Since the only three
+    * formats available when using BindTexImage are all UNORM, we can never
+    * end up with an sRGB format being used for texturing and so we shouldn't
+    * get any format-related resolves when texturing from it.
+    *
+    * While very unlikely, it is possible that the client could use the bound
+    * texture with GL_ARB_image_load_store.  In that case, we'll do a resolve
+    * but that's not actually a problem as it just means that we lose
+    * compression on this texture until the next time it's used as a render
+    * target.
+    *
+    * The only other way we could end up with an unexpected aux usage would be
+    * if we rendered to the image from the same context as we have it bound as
+    * a texture between BindTexImage and ReleaseTexImage.  However, the spec
+    * clearly calls this case out and says you shouldn't do that.  It doesn't
+    * explicitly prevent binding the texture to a framebuffer but it says the
+    * results of trying to render to it while bound are undefined.
+    *
+    * Just to keep everything safe and sane, we do a prepare_external but it
+    * should be a no-op in almost all cases.  On the off chance that someone
+    * ever triggers this, we should at least warn them.
+    */
+   if (intel_tex->mt->aux_buf &&
+       intel_miptree_get_aux_state(intel_tex->mt, 0, 0) !=
+       isl_drm_modifier_get_default_aux_state(intel_tex->mt->drm_modifier)) {
+      _mesa_warning(ctx, "Aux state changed between BindTexImage and "
+                         "ReleaseTexImage.  Most likely someone tried to draw "
+                         "to the pixmap bound in BindTexImage or used it with "
+                         "image_load_store.");
+   }
+
+   intel_miptree_prepare_external(brw, intel_tex->mt);
+
+   _mesa_unlock_texture(&brw->ctx, tex_obj);
+}
+
  static GLboolean
  intel_bind_renderbuffer_tex_image(struct gl_context *ctx,
                                    struct gl_renderbuffer *rb,
  static GLboolean
  intel_bind_renderbuffer_tex_image(struct gl_context *ctx,
                                    struct gl_renderbuffer *rb,
@@ -334,16 +617,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
     if (image == NULL)
        return;
  
     if (image == NULL)
        return;
  
-   /* We support external textures only for EGLImages created with
-    * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future.
-    */
-   if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-            "glEGLImageTargetTexture2DOES(external target is enabled only "
-               "for images created with EGL_EXT_image_dma_buf_import");
-      return;
-   }
-
     /* Disallow depth/stencil textures: we don't have a way to pass the
      * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
      */
     /* Disallow depth/stencil textures: we don't have a way to pass the
      * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
      */
@@ -352,8 +625,8 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
        return;
     }
  
        return;
     }
  
-   mt = intel_miptree_create_for_dri_image(brw, image, target,
-                                           ISL_COLORSPACE_NONE, false);
+   mt = intel_miptree_create_for_dri_image(brw, image, target, image->format,
+                                           false);
     if (mt == NULL)
        return;
  
     if (mt == NULL)
        return;
  
@@ -363,16 +636,41 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
     const GLenum internal_format =
        image->internal_format != 0 ?
        image->internal_format : _mesa_get_format_base_format(mt->format);
     const GLenum internal_format =
        image->internal_format != 0 ?
        image->internal_format : _mesa_get_format_base_format(mt->format);
-   intel_set_texture_image_mt(brw, texImage, internal_format, mt);
+   intel_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt);
     intel_miptree_release(&mt);
  }
  
     intel_miptree_release(&mt);
  }
  
+static bool
+intel_gettexsubimage_blorp(struct brw_context *brw,
+                           struct gl_texture_image *tex_image,
+                           unsigned x, unsigned y, unsigned z,
+                           unsigned width, unsigned height, unsigned depth,
+                           GLenum format, GLenum type, const void *pixels,
+                           const struct gl_pixelstore_attrib *packing)
+{
+   struct intel_texture_image *intel_image = intel_texture_image(tex_image);
+   const unsigned mt_level = tex_image->Level + tex_image->TexObject->MinLevel;
+   const unsigned mt_z = tex_image->TexObject->MinLayer + tex_image->Face + z;
+
+   /* The blorp path can't understand crazy format hackery */
+   if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
+       _mesa_get_format_base_format(tex_image->TexFormat))
+      return false;
+
+   return brw_blorp_download_miptree(brw, intel_image->mt,
+                                     tex_image->TexFormat, SWIZZLE_XYZW,
+                                     mt_level, x, y, mt_z,
+                                     width, height, depth,
+                                     tex_image->TexObject->Target,
+                                     format, type, false, pixels, packing);
+}
+
  /**
   * \brief A fast path for glGetTexImage.
   *
   * \see intel_readpixels_tiled_memcpy()
   */
  /**
   * \brief A fast path for glGetTexImage.
   *
   * \see intel_readpixels_tiled_memcpy()
   */
-bool
+static bool
  intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
                                    struct gl_texture_image *texImage,
                                    GLint xoffset, GLint yoffset,
  intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
                                    struct gl_texture_image *texImage,
                                    GLint xoffset, GLint yoffset,
@@ -390,7 +688,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
     struct brw_bo *bo;
  
     uint32_t cpp;
     struct brw_bo *bo;
  
     uint32_t cpp;
-   mem_copy_fn mem_copy = NULL;
+   isl_memcpy_type copy_type;
  
     /* This fastpath is restricted to specific texture types:
      * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
  
     /* This fastpath is restricted to specific texture types:
      * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -401,7 +699,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
      * with _mesa_image_row_stride. However, before removing the restrictions
      * we need tests.
      */
      * with _mesa_image_row_stride. However, before removing the restrictions
      * we need tests.
      */
-   if (!brw->has_llc ||
+   if (!devinfo->has_llc ||
         !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
         !(texImage->TexObject->Target == GL_TEXTURE_2D ||
           texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
         !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
         !(texImage->TexObject->Target == GL_TEXTURE_2D ||
           texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
@@ -424,7 +722,9 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
     if (texImage->_BaseFormat == GL_RGB)
        return false;
  
     if (texImage->_BaseFormat == GL_RGB)
        return false;
  
-   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+   copy_type = intel_miptree_get_memcpy_type(texImage->TexFormat, format, type,
+                                             &cpp);
+   if (copy_type == ISL_MEMCPY_INVALID)
        return false;
  
     /* If this is a nontrivial texture view, let another path handle it instead. */
        return false;
  
     /* If this is a nontrivial texture view, let another path handle it instead. */
@@ -488,15 +788,15 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
     xoffset += level_x;
     yoffset += level_y;
  
     xoffset += level_x;
     yoffset += level_y;
  
-   tiled_to_linear(
+   isl_memcpy_tiled_to_linear(
        xoffset * cpp, (xoffset + width) * cpp,
        yoffset, yoffset + height,
        xoffset * cpp, (xoffset + width) * cpp,
        yoffset, yoffset + height,
-      pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
+      pixels,
        map,
        map,
-      dst_pitch, image->mt->surf.row_pitch,
+      dst_pitch, image->mt->surf.row_pitch_B,
        brw->has_swizzling,
        image->mt->surf.tiling,
        brw->has_swizzling,
        image->mt->surf.tiling,
-      mem_copy
+      copy_type
     );
  
     brw_bo_unmap(bo);
     );
  
     brw_bo_unmap(bo);
@@ -516,18 +816,11 @@ intel_get_tex_sub_image(struct gl_context *ctx,
     DBG("%s\n", __func__);
  
     if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
     DBG("%s\n", __func__);
  
     if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage,
-                                        xoffset, yoffset, zoffset,
-                                        width, height, depth, format, type,
-                                        pixels, &ctx->Pack)) {
-         /* Flush to guarantee coherency between the render cache and other
-          * caches the PBO could potentially be bound to after this point.
-          * See the related comment in intelReadPixels() for a more detailed
-          * explanation.
-          */
-         brw_emit_mi_flush(brw);
+      if (intel_gettexsubimage_blorp(brw, texImage,
+                                     xoffset, yoffset, zoffset,
+                                     width, height, depth, format, type,
+                                     pixels, &ctx->Pack))
           return;
           return;
-      }
  
        perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
     }
  
        perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
     }
@@ -626,7 +919,7 @@ intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
                          !_mesa_is_srgb_format(gl_format);
     struct brw_context *brw = (struct brw_context*) ctx;
     const struct gen_device_info *devinfo = &brw->screen->devinfo;
                          !_mesa_is_srgb_format(gl_format);
     struct brw_context *brw = (struct brw_context*) ctx;
     const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   if (devinfo->gen == 9 && is_linear_astc)
+   if (devinfo->gen == 9 && !gen_device_info_is_9lp(devinfo) && is_linear_astc)
        flush_astc_denorms(ctx, dims, texImage,
                           xoffset, yoffset, zoffset,
                           width, height, depth);
        flush_astc_denorms(ctx, dims, texImage,
                           xoffset, yoffset, zoffset,
                           width, height, depth);
@@ -636,6 +929,7 @@ void
  intelInitTextureImageFuncs(struct dd_function_table *functions)
  {
     functions->TexImage = intelTexImage;
  intelInitTextureImageFuncs(struct dd_function_table *functions)
  {
     functions->TexImage = intelTexImage;
+   functions->TexSubImage = intelTexSubImage;
     functions->CompressedTexSubImage = intelCompressedTexSubImage;
     functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
     functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image;
     functions->CompressedTexSubImage = intelCompressedTexSubImage;
     functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
     functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image;