i965: Allocate tile aligned height
[mesa.git] / src / mesa / drivers / dri / i965 / intel_screen.c
index 1e752f40f0dcecb435878166b3f05acaebcf4a14..96f30168127acac0909714a21bd179c1b30d7376 100644 (file)
@@ -23,6 +23,7 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <drm_fourcc.h>
 #include <errno.h>
 #include <time.h>
 #include <unistd.h>
 #include "utils.h"
 #include "xmlpool.h"
 
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1)
+#endif
+
+#ifndef DRM_FORMAT_MOD_LINEAR
+#define DRM_FORMAT_MOD_LINEAR 0
+#endif
+
 static const __DRIconfigOptionsExtension brw_config_options = {
    .base = { __DRI_CONFIG_OPTIONS, 1 },
    .xml =
@@ -56,10 +65,6 @@ DRI_CONF_BEGIN
            DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
         DRI_CONF_DESC_END
       DRI_CONF_OPT_END
-
-      DRI_CONF_OPT_BEGIN_B(hiz, "true")
-        DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+")
-      DRI_CONF_OPT_END
    DRI_CONF_SECTION_END
 
    DRI_CONF_SECTION_QUALITY
@@ -84,7 +89,9 @@ DRI_CONF_BEGIN
       DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
       DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION("false")
       DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false")
+      DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false")
       DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
+      DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
 
       DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
         DRI_CONF_DESC(en, "Perform code generation at shader link time.")
@@ -99,7 +106,7 @@ DRI_CONF_END
 
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
-#include "intel_bufmgr.h"
+#include "brw_bufmgr.h"
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 #include "intel_screen.h"
@@ -123,39 +130,6 @@ get_time(void)
    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
 }
 
-void
-aub_dump_bmp(struct gl_context *ctx)
-{
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-      struct intel_renderbuffer *irb =
-        intel_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-      if (irb && irb->mt) {
-        enum aub_dump_bmp_format format;
-
-        switch (irb->Base.Base.Format) {
-        case MESA_FORMAT_B8G8R8A8_UNORM:
-        case MESA_FORMAT_B8G8R8X8_UNORM:
-           format = AUB_DUMP_BMP_FORMAT_ARGB_8888;
-           break;
-        default:
-           continue;
-        }
-
-         drm_intel_gem_bo_aub_dump_bmp(irb->mt->bo,
-                                      irb->draw_x,
-                                      irb->draw_y,
-                                      irb->Base.Base.Width,
-                                      irb->Base.Base.Height,
-                                      format,
-                                      irb->mt->pitch,
-                                      0);
-      }
-   }
-}
-
 static const __DRItexBufferExtension intelTexBufferExtension = {
    .base = { __DRI_TEX_BUFFER, 3 },
 
@@ -188,10 +162,6 @@ intel_dri2_flush_with_flags(__DRIcontext *cPriv,
       brw->need_flush_throttle = true;
 
    intel_batchbuffer_flush(brw);
-
-   if (INTEL_DEBUG & DEBUG_AUB) {
-      aub_dump_bmp(ctx);
-   }
 }
 
 /**
@@ -321,11 +291,63 @@ static struct intel_image_format intel_image_formats[] = {
        { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }
 };
 
+static const struct {
+   uint32_t tiling;
+   uint64_t modifier;
+   unsigned height_align;
+} tiling_modifier_map[] = {
+   { .tiling = I915_TILING_NONE, .modifier = DRM_FORMAT_MOD_LINEAR,
+     .height_align = 1 },
+   { .tiling = I915_TILING_X, .modifier = I915_FORMAT_MOD_X_TILED,
+     .height_align = 8 },
+   { .tiling = I915_TILING_Y, .modifier = I915_FORMAT_MOD_Y_TILED,
+     .height_align = 32 },
+};
+
+static uint32_t
+modifier_to_tiling(uint64_t modifier)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) {
+      if (tiling_modifier_map[i].modifier == modifier)
+         return tiling_modifier_map[i].tiling;
+   }
+
+   unreachable("modifier_to_tiling should only receive known modifiers");
+}
+
+static uint64_t
+tiling_to_modifier(uint32_t tiling)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) {
+      if (tiling_modifier_map[i].tiling == tiling)
+         return tiling_modifier_map[i].modifier;
+   }
+
+   unreachable("tiling_to_modifier received unknown tiling mode");
+}
+
+static unsigned
+get_tiled_height(uint64_t modifier, unsigned height)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) {
+      if (tiling_modifier_map[i].modifier == modifier)
+         return ALIGN(height, tiling_modifier_map[i].height_align);
+   }
+
+   unreachable("get_tiled_height received unknown tiling mode");
+}
+
 static void
 intel_image_warn_if_unaligned(__DRIimage *image, const char *func)
 {
    uint32_t tiling, swizzle;
-   drm_intel_bo_get_tiling(image->bo, &tiling, &swizzle);
+   brw_bo_get_tiling(image->bo, &tiling, &swizzle);
 
    if (tiling != I915_TILING_NONE && (image->offset & 0xfff)) {
       _mesa_warning(NULL, "%s: offset 0x%08x not on tile boundary",
@@ -360,7 +382,8 @@ static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
 }
 
 static __DRIimage *
-intel_allocate_image(int dri_format, void *loaderPrivate)
+intel_allocate_image(struct intel_screen *screen, int dri_format,
+                     void *loaderPrivate)
 {
     __DRIimage *image;
 
@@ -368,6 +391,7 @@ intel_allocate_image(int dri_format, void *loaderPrivate)
     if (image == NULL)
        return NULL;
 
+    image->screen = screen;
     image->dri_format = dri_format;
     image->offset = 0;
 
@@ -404,9 +428,9 @@ intel_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image,
                                                   &image->tile_x,
                                                   &image->tile_y);
 
-   drm_intel_bo_unreference(image->bo);
+   brw_bo_unreference(image->bo);
    image->bo = mt->bo;
-   drm_intel_bo_reference(mt->bo);
+   brw_bo_reference(mt->bo);
 }
 
 static __DRIimage *
@@ -418,7 +442,7 @@ intel_create_image_from_name(__DRIscreen *dri_screen,
     __DRIimage *image;
     int cpp;
 
-    image = intel_allocate_image(format, loaderPrivate);
+    image = intel_allocate_image(screen, format, loaderPrivate);
     if (image == NULL)
        return NULL;
 
@@ -430,12 +454,13 @@ intel_create_image_from_name(__DRIscreen *dri_screen,
     image->width = width;
     image->height = height;
     image->pitch = pitch * cpp;
-    image->bo = drm_intel_bo_gem_create_from_name(screen->bufmgr, "image",
+    image->bo = brw_bo_gem_create_from_name(screen->bufmgr, "image",
                                                   name);
     if (!image->bo) {
        free(image);
        return NULL;
     }
+    image->modifier = tiling_to_modifier(image->bo->tiling_mode);
 
     return image;
 }
@@ -464,11 +489,12 @@ intel_create_image_from_renderbuffer(__DRIcontext *context,
 
    image->internal_format = rb->InternalFormat;
    image->format = rb->Format;
+   image->modifier = tiling_to_modifier(irb->mt->tiling);
    image->offset = 0;
    image->data = loaderPrivate;
-   drm_intel_bo_unreference(image->bo);
+   brw_bo_unreference(image->bo);
    image->bo = irb->mt->bo;
-   drm_intel_bo_reference(irb->mt->bo);
+   brw_bo_reference(irb->mt->bo);
    image->width = rb->Width;
    image->height = rb->Height;
    image->pitch = irb->mt->pitch;
@@ -525,6 +551,7 @@ intel_create_image_from_texture(__DRIcontext *context, int target,
 
    image->internal_format = obj->Image[face][level]->InternalFormat;
    image->format = obj->Image[face][level]->TexFormat;
+   image->modifier = tiling_to_modifier(iobj->mt->tiling);
    image->data = loaderPrivate;
    intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
    image->dri_format = driGLFormatToImageFormat(image->format);
@@ -542,51 +569,137 @@ intel_create_image_from_texture(__DRIcontext *context, int target,
 static void
 intel_destroy_image(__DRIimage *image)
 {
-   drm_intel_bo_unreference(image->bo);
+   brw_bo_unreference(image->bo);
    free(image);
 }
 
+enum modifier_priority {
+   MODIFIER_PRIORITY_INVALID = 0,
+   MODIFIER_PRIORITY_LINEAR,
+   MODIFIER_PRIORITY_X,
+   MODIFIER_PRIORITY_Y,
+};
+
+const uint64_t priority_to_modifier[] = {
+   [MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
+   [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
+   [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED,
+   [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
+};
+
+static uint64_t
+select_best_modifier(struct gen_device_info *devinfo,
+                     const uint64_t *modifiers,
+                     const unsigned count)
+{
+   enum modifier_priority prio = MODIFIER_PRIORITY_INVALID;
+
+   for (int i = 0; i < count; i++) {
+      switch (modifiers[i]) {
+      case I915_FORMAT_MOD_Y_TILED:
+         prio = MAX2(prio, MODIFIER_PRIORITY_Y);
+         break;
+      case I915_FORMAT_MOD_X_TILED:
+         prio = MAX2(prio, MODIFIER_PRIORITY_X);
+         break;
+      case DRM_FORMAT_MOD_LINEAR:
+         prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
+         break;
+      case DRM_FORMAT_MOD_INVALID:
+      default:
+         break;
+      }
+   }
+
+   return priority_to_modifier[prio];
+}
+
 static __DRIimage *
-intel_create_image(__DRIscreen *dri_screen,
-                  int width, int height, int format,
-                  unsigned int use,
-                  void *loaderPrivate)
+intel_create_image_common(__DRIscreen *dri_screen,
+                          int width, int height, int format,
+                          unsigned int use,
+                          const uint64_t *modifiers,
+                          unsigned count,
+                          void *loaderPrivate)
 {
    __DRIimage *image;
    struct intel_screen *screen = dri_screen->driverPrivate;
    uint32_t tiling;
+   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+   unsigned tiled_height;
    int cpp;
-   unsigned long pitch;
 
-   tiling = I915_TILING_X;
+   /* Callers of this may specify a modifier, or a dri usage, but not both. The
+    * newer modifier interface deprecates the older usage flags newer modifier
+    * interface deprecates the older usage flags.
+    */
+   assert(!(use && count));
+
    if (use & __DRI_IMAGE_USE_CURSOR) {
       if (width != 64 || height != 64)
         return NULL;
-      tiling = I915_TILING_NONE;
+      modifier = DRM_FORMAT_MOD_LINEAR;
    }
 
    if (use & __DRI_IMAGE_USE_LINEAR)
-      tiling = I915_TILING_NONE;
+      modifier = DRM_FORMAT_MOD_LINEAR;
+
+   if (modifier == DRM_FORMAT_MOD_INVALID) {
+      if (modifiers) {
+         /* User requested specific modifiers */
+         modifier = select_best_modifier(&screen->devinfo, modifiers, count);
+         if (modifier == DRM_FORMAT_MOD_INVALID)
+            return NULL;
+      } else {
+         /* Historically, X-tiled was the default, and so lack of modifier means
+          * X-tiled.
+          */
+         modifier = I915_FORMAT_MOD_X_TILED;
+      }
+   }
+   tiling = modifier_to_tiling(modifier);
+   tiled_height = get_tiled_height(modifier, height);
 
-   image = intel_allocate_image(format, loaderPrivate);
+   image = intel_allocate_image(screen, format, loaderPrivate);
    if (image == NULL)
       return NULL;
 
    cpp = _mesa_get_format_bytes(image->format);
-   image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image",
-                                        width, height, cpp, &tiling,
-                                        &pitch, 0);
+   image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image",
+                                  width, tiled_height, cpp, tiling,
+                                  &image->pitch, 0);
    if (image->bo == NULL) {
       free(image);
       return NULL;
    }
    image->width = width;
    image->height = height;
-   image->pitch = pitch;
+   image->modifier = modifier;
 
    return image;
 }
 
+static __DRIimage *
+intel_create_image(__DRIscreen *dri_screen,
+                  int width, int height, int format,
+                  unsigned int use,
+                  void *loaderPrivate)
+{
+   return intel_create_image_common(dri_screen, width, height, format, use, NULL, 0,
+                               loaderPrivate);
+}
+
+static __DRIimage *
+intel_create_image_with_modifiers(__DRIscreen *dri_screen,
+                                  int width, int height, int format,
+                                  const uint64_t *modifiers,
+                                  const unsigned count,
+                                  void *loaderPrivate)
+{
+   return intel_create_image_common(dri_screen, width, height, format, 0,
+                                    modifiers, count, loaderPrivate);
+}
+
 static GLboolean
 intel_query_image(__DRIimage *image, int attrib, int *value)
 {
@@ -595,10 +708,10 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
       *value = image->pitch;
       return true;
    case __DRI_IMAGE_ATTRIB_HANDLE:
-      *value = image->bo->handle;
+      *value = image->bo->gem_handle;
       return true;
    case __DRI_IMAGE_ATTRIB_NAME:
-      return !drm_intel_bo_flink(image->bo, (uint32_t *) value);
+      return !brw_bo_flink(image->bo, (uint32_t *) value);
    case __DRI_IMAGE_ATTRIB_FORMAT:
       *value = image->dri_format;
       return true;
@@ -614,7 +727,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
       *value = image->planar_format->components;
       return true;
    case __DRI_IMAGE_ATTRIB_FD:
-      return !drm_intel_bo_gem_export_to_prime(image->bo, value);
+      return !brw_bo_gem_export_to_prime(image->bo, value);
    case __DRI_IMAGE_ATTRIB_FOURCC:
       return intel_lookup_fourcc(image->dri_format, value);
    case __DRI_IMAGE_ATTRIB_NUM_PLANES:
@@ -623,6 +736,12 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
    case __DRI_IMAGE_ATTRIB_OFFSET:
       *value = image->offset;
       return true;
+   case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
+      *value = (image->modifier & 0xffffffff);
+      return true;
+   case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
+      *value = ((image->modifier >> 32) & 0xffffffff);
+      return true;
 
   default:
       return false;
@@ -638,12 +757,13 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
    if (image == NULL)
       return NULL;
 
-   drm_intel_bo_reference(orig_image->bo);
+   brw_bo_reference(orig_image->bo);
    image->bo              = orig_image->bo;
    image->internal_format = orig_image->internal_format;
    image->planar_format   = orig_image->planar_format;
    image->dri_format      = orig_image->dri_format;
    image->format          = orig_image->format;
+   image->modifier        = orig_image->modifier;
    image->offset          = orig_image->offset;
    image->width           = orig_image->width;
    image->height          = orig_image->height;
@@ -715,6 +835,7 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
    struct intel_screen *screen = dri_screen->driverPrivate;
    struct intel_image_format *f;
    __DRIimage *image;
+   unsigned tiled_height;
    int i, index;
 
    if (fds == NULL || num_fds < 1)
@@ -730,9 +851,11 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
       return NULL;
 
    if (f->nplanes == 1)
-      image = intel_allocate_image(f->planes[0].dri_format, loaderPrivate);
+      image = intel_allocate_image(screen, f->planes[0].dri_format,
+                                   loaderPrivate);
    else
-      image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate);
+      image = intel_allocate_image(screen, __DRI_IMAGE_FORMAT_NONE,
+                                   loaderPrivate);
 
    if (image == NULL)
       return NULL;
@@ -742,21 +865,34 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
    image->pitch = strides[0];
 
    image->planar_format = f;
+
+   image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
+   if (image->bo == NULL) {
+      free(image);
+      return NULL;
+   }
+
+   image->modifier = tiling_to_modifier(image->bo->tiling_mode);
+   tiled_height = get_tiled_height(image->modifier, height);
+
    int size = 0;
    for (i = 0; i < f->nplanes; i++) {
       index = f->planes[i].buffer_index;
       image->offsets[index] = offsets[index];
       image->strides[index] = strides[index];
 
-      const int plane_height = height >> f->planes[i].height_shift;
+      const int plane_height = tiled_height >> f->planes[i].height_shift;
       const int end = offsets[index] + plane_height * strides[index];
       if (size < end)
          size = end;
    }
 
-   image->bo = drm_intel_bo_gem_create_from_prime(screen->bufmgr,
-                                                  fds[0], size);
-   if (image->bo == NULL) {
+   /* Check that the requested image actually fits within the BO. 'size'
+    * is already relative to the offsets, so we don't need to add that. */
+   if (image->bo->size == 0) {
+      image->bo->size = size;
+   } else if (size > image->bo->size) {
+      brw_bo_unreference(image->bo);
       free(image);
       return NULL;
    }
@@ -835,7 +971,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
     offset = parent->offsets[index];
     stride = parent->strides[index];
 
-    image = intel_allocate_image(dri_format, loaderPrivate);
+    image = intel_allocate_image(parent->screen, dri_format, loaderPrivate);
     if (image == NULL)
        return NULL;
 
@@ -846,7 +982,8 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
     }
 
     image->bo = parent->bo;
-    drm_intel_bo_reference(parent->bo);
+    brw_bo_reference(parent->bo);
+    image->modifier = parent->modifier;
 
     image->width = width;
     image->height = height;
@@ -859,7 +996,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
 }
 
 static const __DRIimageExtension intelImageExtension = {
-    .base = { __DRI_IMAGE, 13 },
+    .base = { __DRI_IMAGE, 14 },
 
     .createImageFromName                = intel_create_image_from_name,
     .createImageFromRenderbuffer        = intel_create_image_from_renderbuffer,
@@ -877,8 +1014,20 @@ static const __DRIimageExtension intelImageExtension = {
     .getCapabilities                    = NULL,
     .mapImage                           = NULL,
     .unmapImage                         = NULL,
+    .createImageWithModifiers           = intel_create_image_with_modifiers,
 };
 
+static uint64_t
+get_aperture_size(int fd)
+{
+   struct drm_i915_gem_get_aperture aperture;
+
+   if (drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) != 0)
+      return 0;
+
+   return aperture.aper_size;
+}
+
 static int
 brw_query_renderer_integer(__DRIscreen *dri_screen,
                            int param, unsigned int *value)
@@ -901,13 +1050,8 @@ brw_query_renderer_integer(__DRIscreen *dri_screen,
        * assume that there's some fragmentation, and we start doing extra
        * flushing, etc.  That's the big cliff apps will care about.
        */
-      size_t aper_size;
-      size_t mappable_size;
-
-      drm_intel_get_aperture_sizes(dri_screen->fd, &mappable_size, &aper_size);
-
       const unsigned gpu_mappable_megabytes =
-         (aper_size / (1024 * 1024)) * 3 / 4;
+         screen->aperture_threshold / (1024 * 1024);
 
       const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
       const long system_page_size = sysconf(_SC_PAGE_SIZE);
@@ -1032,7 +1176,7 @@ intelDestroyScreen(__DRIscreen * sPriv)
 {
    struct intel_screen *screen = sPriv->driverPrivate;
 
-   dri_bufmgr_destroy(screen->bufmgr);
+   brw_bufmgr_destroy(screen->bufmgr);
    driDestroyOptionInfo(&screen->optionCache);
 
    ralloc_free(screen);
@@ -1041,7 +1185,11 @@ intelDestroyScreen(__DRIscreen * sPriv)
 
 
 /**
- * This is called when we need to set up GL rendering to a new X window.
+ * Create a gl_framebuffer and attach it to __DRIdrawable::driverPrivate.
+ *
+ *_This implements driDriverAPI::createNewDrawable, which the DRI layer calls
+ * when creating a EGLSurface, GLXDrawable, or GLXPixmap. Despite the name,
+ * this does not allocate GPU memory.
  */
 static GLboolean
 intelCreateBuffer(__DRIscreen *dri_screen,
@@ -1054,12 +1202,11 @@ intelCreateBuffer(__DRIscreen *dri_screen,
    mesa_format rgbFormat;
    unsigned num_samples =
       intel_quantize_num_samples(screen, mesaVis->samples);
-   struct gl_framebuffer *fb;
 
    if (isPixmap)
       return false;
 
-   fb = CALLOC_STRUCT(gl_framebuffer);
+   struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer);
    if (!fb)
       return false;
 
@@ -1086,12 +1233,12 @@ intelCreateBuffer(__DRIscreen *dri_screen,
    }
 
    /* setup the hardware-based renderbuffers */
-   rb = intel_create_renderbuffer(rgbFormat, num_samples);
-   _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base.Base);
+   rb = intel_create_winsys_renderbuffer(rgbFormat, num_samples);
+   _mesa_attach_and_own_rb(fb, BUFFER_FRONT_LEFT, &rb->Base.Base);
 
    if (mesaVis->doubleBufferMode) {
-      rb = intel_create_renderbuffer(rgbFormat, num_samples);
-      _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base.Base);
+      rb = intel_create_winsys_renderbuffer(rgbFormat, num_samples);
+      _mesa_attach_and_own_rb(fb, BUFFER_BACK_LEFT, &rb->Base.Base);
    }
 
    /*
@@ -1105,10 +1252,10 @@ intelCreateBuffer(__DRIscreen *dri_screen,
       if (screen->devinfo.has_hiz_and_separate_stencil) {
          rb = intel_create_private_renderbuffer(MESA_FORMAT_Z24_UNORM_X8_UINT,
                                                 num_samples);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+         _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
          rb = intel_create_private_renderbuffer(MESA_FORMAT_S_UINT8,
                                                 num_samples);
-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base);
+         _mesa_attach_and_own_rb(fb, BUFFER_STENCIL, &rb->Base.Base);
       } else {
          /*
           * Use combined depth/stencil. Note that the renderbuffer is
@@ -1116,15 +1263,15 @@ intelCreateBuffer(__DRIscreen *dri_screen,
           */
          rb = intel_create_private_renderbuffer(MESA_FORMAT_Z24_UNORM_S8_UINT,
                                                 num_samples);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base);
+         _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
+         _mesa_attach_and_reference_rb(fb, BUFFER_STENCIL, &rb->Base.Base);
       }
    }
    else if (mesaVis->depthBits == 16) {
       assert(mesaVis->stencilBits == 0);
       rb = intel_create_private_renderbuffer(MESA_FORMAT_Z_UNORM16,
                                              num_samples);
-      _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+      _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
    }
    else {
       assert(mesaVis->depthBits == 0);
@@ -1192,19 +1339,18 @@ intel_init_bufmgr(struct intel_screen *screen)
 {
    __DRIscreen *dri_screen = screen->driScrnPriv;
 
-   screen->no_hw = getenv("INTEL_NO_HW") != NULL;
+   if (getenv("INTEL_NO_HW") != NULL)
+      screen->no_hw = true;
 
-   screen->bufmgr = intel_bufmgr_gem_init(dri_screen->fd, BATCH_SZ);
+   screen->bufmgr = brw_bufmgr_init(&screen->devinfo, dri_screen->fd, BATCH_SZ);
    if (screen->bufmgr == NULL) {
       fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
              __func__, __LINE__);
       return false;
    }
 
-   drm_intel_bufmgr_gem_enable_fenced_relocs(screen->bufmgr);
-
-   if (!intel_get_boolean(screen, I915_PARAM_HAS_RELAXED_DELTA)) {
-      fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__);
+   if (!intel_get_boolean(screen, I915_PARAM_HAS_WAIT_TIMEOUT)) {
+      fprintf(stderr, "[%s: %u] Kernel 3.6 required.\n", __func__, __LINE__);
       return false;
    }
 
@@ -1214,20 +1360,19 @@ intel_init_bufmgr(struct intel_screen *screen)
 static bool
 intel_detect_swizzling(struct intel_screen *screen)
 {
-   drm_intel_bo *buffer;
-   unsigned long flags = 0;
-   unsigned long aligned_pitch;
+   struct brw_bo *buffer;
+   unsigned flags = 0;
+   uint32_t aligned_pitch;
    uint32_t tiling = I915_TILING_X;
    uint32_t swizzle_mode = 0;
 
-   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
-                                    64, 64, 4,
-                                    &tiling, &aligned_pitch, flags);
+   buffer = brw_bo_alloc_tiled(screen->bufmgr, "swizzle test",
+                               64, 64, 4, tiling, &aligned_pitch, flags);
    if (buffer == NULL)
       return false;
 
-   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode);
-   drm_intel_bo_unreference(buffer);
+   brw_bo_get_tiling(buffer, &tiling, &swizzle_mode);
+   brw_bo_unreference(buffer);
 
    if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
       return false;
@@ -1247,13 +1392,13 @@ intel_detect_timestamp(struct intel_screen *screen)
     * More recent kernels offer an interface to read the full 36bits
     * everywhere.
     */
-   if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
+   if (brw_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
       return 3;
 
    /* Determine if we have a 32bit or 64bit kernel by inspecting the
     * upper 32bits for a rapidly changing timestamp.
     */
-   if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &last))
+   if (brw_reg_read(screen->bufmgr, TIMESTAMP, &last))
       return 0;
 
    upper = lower = 0;
@@ -1261,7 +1406,7 @@ intel_detect_timestamp(struct intel_screen *screen)
       /* The TIMESTAMP should change every 80ns, so several round trips
        * through the kernel should be enough to advance it.
        */
-      if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
+      if (brw_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
          return 0;
 
       upper += (dummy >> 32) != (last >> 32);
@@ -1290,24 +1435,29 @@ static bool
 intel_detect_pipelined_register(struct intel_screen *screen,
                                 int reg, uint32_t expected_value, bool reset)
 {
-   drm_intel_bo *results, *bo;
+   if (screen->no_hw)
+      return false;
+
+   struct brw_bo *results, *bo;
    uint32_t *batch;
    uint32_t offset = 0;
+   void *map;
    bool success = false;
 
    /* Create a zero'ed temporary buffer for reading our results */
-   results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0);
+   results = brw_bo_alloc(screen->bufmgr, "registers", 4096, 0);
    if (results == NULL)
       goto err;
 
-   bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
+   bo = brw_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
    if (bo == NULL)
       goto err_results;
 
-   if (drm_intel_bo_map(bo, 1))
+   map = brw_bo_map(NULL, bo, MAP_WRITE);
+   if (!map)
       goto err_batch;
 
-   batch = bo->virtual;
+   batch = map;
 
    /* Write the register. */
    *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
@@ -1317,11 +1467,14 @@ intel_detect_pipelined_register(struct intel_screen *screen,
    /* Save the register's value back to the buffer. */
    *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
    *batch++ = reg;
-   drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual,
-                           results, offset*sizeof(uint32_t),
-                           I915_GEM_DOMAIN_INSTRUCTION,
-                           I915_GEM_DOMAIN_INSTRUCTION);
-   *batch++ = results->offset + offset*sizeof(uint32_t);
+   struct drm_i915_gem_relocation_entry reloc = {
+      .offset = (char *) batch - (char *) map,
+      .delta = offset * sizeof(uint32_t),
+      .target_handle = results->gem_handle,
+      .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+      .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
+   };
+   *batch++ = reloc.presumed_offset + reloc.delta;
 
    /* And afterwards clear the register */
    if (reset) {
@@ -1332,20 +1485,41 @@ intel_detect_pipelined_register(struct intel_screen *screen,
 
    *batch++ = MI_BATCH_BUFFER_END;
 
-   drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8),
-                         NULL, 0, 0,
-                         I915_EXEC_RENDER);
+   struct drm_i915_gem_exec_object2 exec_objects[2] = {
+      {
+         .handle = results->gem_handle,
+      },
+      {
+         .handle = bo->gem_handle,
+         .relocation_count = 1,
+         .relocs_ptr = (uintptr_t) &reloc,
+      }
+   };
+
+   struct drm_i915_gem_execbuffer2 execbuf = {
+      .buffers_ptr = (uintptr_t) exec_objects,
+      .buffer_count = 2,
+      .batch_len = ALIGN((char *) batch - (char *) map, 8),
+      .flags = I915_EXEC_RENDER,
+   };
+
+   /* Don't bother with error checking - if the execbuf fails, the
+    * value won't be written and we'll just report that there's no access.
+    */
+   __DRIscreen *dri_screen = screen->driScrnPriv;
+   drmIoctl(dri_screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
 
    /* Check whether the value got written. */
-   if (drm_intel_bo_map(results, false) == 0) {
-      success = *((uint32_t *)results->virtual + offset) == expected_value;
-      drm_intel_bo_unmap(results);
+   void *results_map = brw_bo_map(NULL, results, MAP_READ);
+   if (results_map) {
+      success = *((uint32_t *)results_map + offset) == expected_value;
+      brw_bo_unmap(results);
    }
 
 err_batch:
-   drm_intel_bo_unreference(bo);
+   brw_bo_unreference(bo);
 err_results:
-   drm_intel_bo_unreference(results);
+   brw_bo_unreference(results);
 err:
    return success;
 }
@@ -1353,13 +1527,19 @@ err:
 static bool
 intel_detect_pipelined_so(struct intel_screen *screen)
 {
+   const struct gen_device_info *devinfo = &screen->devinfo;
+
    /* Supposedly, Broadwell just works. */
-   if (screen->devinfo.gen >= 8)
+   if (devinfo->gen >= 8)
       return true;
 
-   if (screen->devinfo.gen <= 6)
+   if (devinfo->gen <= 6)
       return false;
 
+   /* See the big explanation about command parser versions below */
+   if (screen->cmd_parser_version >= (devinfo->is_haswell ? 7 : 2))
+      return true;
+
    /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
     * statistics registers), and we already reset it to zero before using it.
     */
@@ -1546,8 +1726,14 @@ set_max_gl_versions(struct intel_screen *screen)
       dri_screen->max_gl_es2_version = has_astc ? 32 : 31;
       break;
    case 7:
-      dri_screen->max_gl_core_version = screen->devinfo.is_haswell &&
-         can_do_pipelined_register_writes(screen) ? 45 : 33;
+      dri_screen->max_gl_core_version = 33;
+      if (can_do_pipelined_register_writes(screen)) {
+         dri_screen->max_gl_core_version = 42;
+         if (screen->devinfo.is_haswell && can_do_compute_dispatch(screen))
+            dri_screen->max_gl_core_version = 43;
+         if (screen->devinfo.is_haswell && can_do_mi_math_and_lrr(screen))
+            dri_screen->max_gl_core_version = 45;
+      }
       dri_screen->max_gl_compat_version = 30;
       dri_screen->max_gl_es1_version = 11;
       dri_screen->max_gl_es2_version = screen->devinfo.is_haswell ? 31 : 30;
@@ -1637,6 +1823,53 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
    va_end(args);
 }
 
+static int
+parse_devid_override(const char *devid_override)
+{
+   static const struct {
+      const char *name;
+      int pci_id;
+   } name_map[] = {
+      { "brw", 0x2a02 },
+      { "g4x", 0x2a42 },
+      { "ilk", 0x0042 },
+      { "snb", 0x0126 },
+      { "ivb", 0x016a },
+      { "hsw", 0x0d2e },
+      { "byt", 0x0f33 },
+      { "bdw", 0x162e },
+      { "skl", 0x1912 },
+      { "kbl", 0x5912 },
+   };
+
+   for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
+      if (!strcmp(name_map[i].name, devid_override))
+         return name_map[i].pci_id;
+   }
+
+   return strtod(devid_override, NULL);
+}
+
+/**
+ * Get the PCI ID for the device.  This can be overridden by setting the
+ * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
+ *
+ * Returns -1 on ioctl failure.
+ */
+static int
+get_pci_device_id(struct intel_screen *screen)
+{
+   if (geteuid() == getuid()) {
+      char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
+      if (devid_override) {
+         screen->no_hw = true;
+         return parse_devid_override(devid_override);
+      }
+   }
+
+   return intel_get_integer(screen, I915_PARAM_CHIPSET_ID);
+}
+
 /**
  * This is the driver specific part of the createNewScreen entry point.
  * Called when using DRI2.
@@ -1654,14 +1887,14 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       fprintf(stderr,
              "\nERROR!  DRI2 loader with getBuffersWithFormat() "
              "support required\n");
-      return false;
+      return NULL;
    }
 
    /* Allocate the private area */
    screen = rzalloc(NULL, struct intel_screen);
    if (!screen) {
       fprintf(stderr, "\nERROR!  Allocating private area failed\n");
-      return false;
+      return NULL;
    }
    /* parse information in __driConfigOptions */
    driParseOptionInfo(&screen->optionCache, brw_config_options.xml);
@@ -1669,36 +1902,31 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
    screen->driScrnPriv = dri_screen;
    dri_screen->driverPrivate = (void *) screen;
 
-   if (!intel_init_bufmgr(screen))
-       return false;
+   screen->deviceID = get_pci_device_id(screen);
 
-   screen->deviceID = drm_intel_bufmgr_gem_get_devid(screen->bufmgr);
    if (!gen_get_device_info(screen->deviceID, &screen->devinfo))
-      return false;
+      return NULL;
+
+   if (!intel_init_bufmgr(screen))
+       return NULL;
 
    const struct gen_device_info *devinfo = &screen->devinfo;
 
    brw_process_intel_debug_variable();
 
-   if (INTEL_DEBUG & DEBUG_BUFMGR)
-      dri_bufmgr_set_debug(screen->bufmgr, true);
-
    if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && devinfo->gen < 7) {
       fprintf(stderr,
               "shader_time debugging requires gen7 (Ivybridge) or better.\n");
       INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
    }
 
-   if (INTEL_DEBUG & DEBUG_AUB)
-      drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true);
-
    if (intel_get_integer(screen, I915_PARAM_MMAP_GTT_VERSION) >= 1) {
       /* Theorectically unlimited! At least for individual objects...
        *
        * Currently the entire (global) address space for all GTT maps is
        * limited to 64bits. That is all objects on the system that are
        * setup for GTT mmapping must fit within 64bits. An attempt to use
-       * one that exceeds the limit with fail in drm_intel_bo_map_gtt().
+       * one that exceeds the limit with fail in brw_bo_map_gtt().
        *
        * Long before we hit that limit, we will be practically limited by
        * that any single object must fit in physical memory (RAM). The upper
@@ -1726,6 +1954,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       screen->max_gtt_map_object_size = gtt_size / 4;
    }
 
+   screen->aperture_threshold = get_aperture_size(dri_screen->fd) * 3 / 4;
+
    screen->hw_has_swizzling = intel_detect_swizzling(screen);
    screen->hw_has_timestamp = intel_detect_timestamp(screen);
 
@@ -1736,8 +1966,147 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       screen->subslice_total = 1 << (devinfo->gt - 1);
    }
 
-   if (intel_detect_pipelined_so(screen))
+   /* Gen7-7.5 kernel requirements / command parser saga:
+    *
+    * - pre-v3.16:
+    *   Haswell and Baytrail cannot use any privileged batchbuffer features.
+    *
+    *   Ivybridge has aliasing PPGTT on by default, which accidentally marks
+    *   all batches secure, allowing them to use any feature with no checking.
+    *   This is effectively equivalent to a command parser version of
+    *   \infinity - everything is possible.
+    *
+    *   The command parser does not exist, and querying the version will
+    *   return -EINVAL.
+    *
+    * - v3.16:
+    *   The kernel enables the command parser by default, for systems with
+    *   aliasing PPGTT enabled (Ivybridge and Haswell).  However, the
+    *   hardware checker is still enabled, so Haswell and Baytrail cannot
+    *   do anything.
+    *
+    *   Ivybridge goes from "everything is possible" to "only what the
+    *   command parser allows" (if the user boots with i915.cmd_parser=0,
+    *   then everything is possible again).  We can only safely use features
+    *   allowed by the supported command parser version.
+    *
+    *   Annoyingly, I915_PARAM_CMD_PARSER_VERSION reports the static version
+    *   implemented by the kernel, even if it's turned off.  So, checking
+    *   for version > 0 does not mean that you can write registers.  We have
+    *   to try it and see.  The version does, however, indicate the age of
+    *   the kernel.
+    *
+    *   Instead of matching the hardware checker's behavior of converting
+    *   privileged commands to MI_NOOP, it makes execbuf2 start returning
+    *   -EINVAL, making it dangerous to try and use privileged features.
+    *
+    *   Effective command parser versions:
+    *   - Haswell:   0 (reporting 1, writes don't work)
+    *   - Baytrail:  0 (reporting 1, writes don't work)
+    *   - Ivybridge: 1 (enabled) or infinite (disabled)
+    *
+    * - v3.17:
+    *   Baytrail aliasing PPGTT is enabled, making it like Ivybridge:
+    *   effectively version 1 (enabled) or infinite (disabled).
+    *
+    * - v3.19: f1f55cc0556031c8ee3fe99dae7251e78b9b653b
+    *   Command parser v2 supports predicate writes.
+    *
+    *   - Haswell:   0 (reporting 1, writes don't work)
+    *   - Baytrail:  2 (enabled) or infinite (disabled)
+    *   - Ivybridge: 2 (enabled) or infinite (disabled)
+    *
+    *   So version >= 2 is enough to know that Ivybridge and Baytrail
+    *   will work.  Haswell still can't do anything.
+    *
+    * - v4.0: Version 3 happened.  Largely not relevant.
+    *
+    * - v4.1: 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b
+    *   L3 config registers are properly saved and restored as part
+    *   of the hardware context.  We can approximately detect this point
+    *   in time by checking if I915_PARAM_REVISION is recognized - it
+    *   landed in a later commit, but in the same release cycle.
+    *
+    * - v4.2: 245054a1fe33c06ad233e0d58a27ec7b64db9284
+    *   Command parser finally gains secure batch promotion.  On Haswell,
+    *   the hardware checker gets disabled, which finally allows it to do
+    *   privileged commands.
+    *
+    *   I915_PARAM_CMD_PARSER_VERSION reports 3.  Effective versions:
+    *   - Haswell:   3 (enabled) or 0 (disabled)
+    *   - Baytrail:  3 (enabled) or infinite (disabled)
+    *   - Ivybridge: 3 (enabled) or infinite (disabled)
+    *
+    *   Unfortunately, detecting this point in time is tricky, because
+    *   no version bump happened when this important change occurred.
+    *   On Haswell, if we can write any register, then the kernel is at
+    *   least this new, and we can start trusting the version number.
+    *
+    * - v4.4: 2bbe6bbb0dc94fd4ce287bdac9e1bd184e23057b and
+    *   Command parser reaches version 4, allowing access to Haswell
+    *   atomic scratch and chicken3 registers.  If version >= 4, we know
+    *   the kernel is new enough to support privileged features on all
+    *   hardware.  However, the user might have disabled it...and the
+    *   kernel will still report version 4.  So we still have to guess
+    *   and check.
+    *
+    * - v4.4: 7b9748cb513a6bef4af87b79f0da3ff7e8b56cd8
+    *   Command parser v5 whitelists indirect compute shader dispatch
+    *   registers, needed for OpenGL 4.3 and later.
+    *
+    * - v4.8:
+    *   Command parser v7 lets us use MI_MATH on Haswell.
+    *
+    *   Additionally, the kernel begins reporting version 0 when
+    *   the command parser is disabled, allowing us to skip the
+    *   guess-and-check step on Haswell.  Unfortunately, this also
+    *   means that we can no longer use it as an indicator of the
+    *   age of the kernel.
+    */
+   if (intel_get_param(screen, I915_PARAM_CMD_PARSER_VERSION,
+                       &screen->cmd_parser_version) < 0) {
+      /* Command parser does not exist - getparam is unrecognized */
+      screen->cmd_parser_version = 0;
+   }
+
+   /* Kernel 4.13 retuired for exec object capture */
+#ifndef I915_PARAM_HAS_EXEC_CAPTURE
+#define I915_PARAM_HAS_EXEC_CAPTURE 45
+#endif
+   if (intel_get_boolean(screen, I915_PARAM_HAS_EXEC_CAPTURE)) {
+      screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE;
+   }
+
+   if (!intel_detect_pipelined_so(screen)) {
+      /* We can't do anything, so the effective version is 0. */
+      screen->cmd_parser_version = 0;
+   } else {
       screen->kernel_features |= KERNEL_ALLOWS_SOL_OFFSET_WRITES;
+   }
+
+   if (devinfo->gen >= 8 || screen->cmd_parser_version >= 2)
+      screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES;
+
+   /* Haswell requires command parser version 4 in order to have L3
+    * atomic scratch1 and chicken3 bits
+    */
+   if (devinfo->is_haswell && screen->cmd_parser_version >= 4) {
+      screen->kernel_features |=
+         KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
+   }
+
+   /* Haswell requires command parser version 6 in order to write to the
+    * MI_MATH GPR registers, and version 7 in order to use
+    * MI_LOAD_REGISTER_REG (which all users of MI_MATH use).
+    */
+   if (devinfo->gen >= 8 ||
+       (devinfo->is_haswell && screen->cmd_parser_version >= 7)) {
+      screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR;
+   }
+
+   /* Gen7 needs at least command parser version 5 to support compute */
+   if (devinfo->gen >= 8 || screen->cmd_parser_version >= 5)
+      screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH;
 
    const char *force_msaa = getenv("INTEL_FORCE_MSAA");
    if (force_msaa) {
@@ -1770,35 +2139,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
          (ret != -1 || errno != EINVAL);
    }
 
-   if (intel_get_param(screen, I915_PARAM_CMD_PARSER_VERSION,
-                       &screen->cmd_parser_version) < 0) {
-      screen->cmd_parser_version = 0;
-   }
-
-   if (devinfo->gen >= 8 || screen->cmd_parser_version >= 2)
-      screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES;
-
-   /* Haswell requires command parser version 4 in order to have L3
-    * atomic scratch1 and chicken3 bits
-    */
-   if (devinfo->is_haswell && screen->cmd_parser_version >= 4) {
-      screen->kernel_features |=
-         KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
-   }
-
-   /* Haswell requires command parser version 6 in order to write to the
-    * MI_MATH GPR registers, and version 7 in order to use
-    * MI_LOAD_REGISTER_REG (which all users of MI_MATH use).
-    */
-   if (devinfo->gen >= 8 ||
-       (devinfo->is_haswell && screen->cmd_parser_version >= 7)) {
-      screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR;
-   }
-
-   /* Gen7 needs at least command parser version 5 to support compute */
-   if (devinfo->gen >= 8 || screen->cmd_parser_version >= 5)
-      screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH;
-
    dri_screen->extensions = !screen->has_context_reset_notification
       ? screenExtensions : intelRobustScreenExtensions;
 
@@ -1815,7 +2155,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
 
 struct intel_buffer {
    __DRIbuffer base;
-   drm_intel_bo *bo;
+   struct brw_bo *bo;
 };
 
 static __DRIbuffer *
@@ -1833,24 +2173,25 @@ intelAllocateBuffer(__DRIscreen *dri_screen,
    if (intelBuffer == NULL)
       return NULL;
 
-   /* The front and back buffers are color buffers, which are X tiled. */
-   uint32_t tiling = I915_TILING_X;
-   unsigned long pitch;
+   /* The front and back buffers are color buffers, which are X tiled. GEN9+
+    * supports Y tiled and compressed buffers, but there is no way to plumb that
+    * through to here. */
+   uint32_t pitch;
    int cpp = format / 8;
-   intelBuffer->bo = drm_intel_bo_alloc_tiled(screen->bufmgr,
-                                              "intelAllocateBuffer",
-                                              width,
-                                              height,
-                                              cpp,
-                                              &tiling, &pitch,
-                                              BO_ALLOC_FOR_RENDER);
+   intelBuffer->bo = brw_bo_alloc_tiled(screen->bufmgr,
+                                        "intelAllocateBuffer",
+                                        width,
+                                        height,
+                                        cpp,
+                                        I915_TILING_X, &pitch,
+                                        BO_ALLOC_FOR_RENDER);
 
    if (intelBuffer->bo == NULL) {
           free(intelBuffer);
           return NULL;
    }
 
-   drm_intel_bo_flink(intelBuffer->bo, &intelBuffer->base.name);
+   brw_bo_flink(intelBuffer->bo, &intelBuffer->base.name);
 
    intelBuffer->base.attachment = attachment;
    intelBuffer->base.cpp = cpp;
@@ -1864,7 +2205,7 @@ intelReleaseBuffer(__DRIscreen *dri_screen, __DRIbuffer *buffer)
 {
    struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer;
 
-   drm_intel_bo_unreference(intelBuffer->bo);
+   brw_bo_unreference(intelBuffer->bo);
    free(intelBuffer);
 }