i965: split EU defines to brw_eu_defines.h
[mesa.git] / src / mesa / drivers / dri / i965 / intel_screen.c
index c580b9700e1bc6f29685baf1db62a3ad1997cf42..21786eb54abe0c2f8c14a9e62cc7a29854a4b4fd 100644 (file)
@@ -35,7 +35,7 @@
 #include "main/version.h"
 #include "swrast/s_renderbuffer.h"
 #include "util/ralloc.h"
-#include "brw_shader.h"
+#include "brw_defines.h"
 #include "compiler/nir/nir.h"
 
 #include "utils.h"
@@ -79,10 +79,12 @@ DRI_CONF_BEGIN
       DRI_CONF_ALWAYS_FLUSH_CACHE("false")
       DRI_CONF_DISABLE_THROTTLING("false")
       DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+      DRI_CONF_FORCE_GLSL_VERSION(0)
       DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
       DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
       DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION("false")
       DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false")
+      DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
 
       DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
         DRI_CONF_DESC(en, "Perform code generation at shader link time.")
@@ -121,39 +123,6 @@ get_time(void)
    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
 }
 
-void
-aub_dump_bmp(struct gl_context *ctx)
-{
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-      struct intel_renderbuffer *irb =
-        intel_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-      if (irb && irb->mt) {
-        enum aub_dump_bmp_format format;
-
-        switch (irb->Base.Base.Format) {
-        case MESA_FORMAT_B8G8R8A8_UNORM:
-        case MESA_FORMAT_B8G8R8X8_UNORM:
-           format = AUB_DUMP_BMP_FORMAT_ARGB_8888;
-           break;
-        default:
-           continue;
-        }
-
-         drm_intel_gem_bo_aub_dump_bmp(irb->mt->bo,
-                                      irb->draw_x,
-                                      irb->draw_y,
-                                      irb->Base.Base.Width,
-                                      irb->Base.Base.Height,
-                                      format,
-                                      irb->mt->pitch,
-                                      0);
-      }
-   }
-}
-
 static const __DRItexBufferExtension intelTexBufferExtension = {
    .base = { __DRI_TEX_BUFFER, 3 },
 
@@ -186,10 +155,6 @@ intel_dri2_flush_with_flags(__DRIcontext *cPriv,
       brw->need_flush_throttle = true;
 
    intel_batchbuffer_flush(brw);
-
-   if (INTEL_DEBUG & DEBUG_AUB) {
-      aub_dump_bmp(ctx);
-   }
 }
 
 /**
@@ -230,15 +195,24 @@ static struct intel_image_format intel_image_formats[] = {
    { __DRI_IMAGE_FOURCC_XBGR8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 }, } },
 
+   { __DRI_IMAGE_FOURCC_ARGB1555, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB1555, 2 } } },
+
    { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
 
    { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
 
+   { __DRI_IMAGE_FOURCC_R16, __DRI_IMAGE_COMPONENTS_R, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 1 }, } },
+
    { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
 
+   { __DRI_IMAGE_FOURCC_GR1616, __DRI_IMAGE_COMPONENTS_RG, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR1616, 2 }, } },
+
    { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
@@ -403,7 +377,7 @@ intel_create_image_from_name(__DRIscreen *dri_screen,
                             int width, int height, int format,
                             int name, int pitch, void *loaderPrivate)
 {
-    struct intel_screen *intelScreen = dri_screen->driverPrivate;
+    struct intel_screen *screen = dri_screen->driverPrivate;
     __DRIimage *image;
     int cpp;
 
@@ -419,7 +393,7 @@ intel_create_image_from_name(__DRIscreen *dri_screen,
     image->width = width;
     image->height = height;
     image->pitch = pitch * cpp;
-    image->bo = drm_intel_bo_gem_create_from_name(intelScreen->bufmgr, "image",
+    image->bo = drm_intel_bo_gem_create_from_name(screen->bufmgr, "image",
                                                   name);
     if (!image->bo) {
        free(image);
@@ -542,7 +516,7 @@ intel_create_image(__DRIscreen *dri_screen,
                   void *loaderPrivate)
 {
    __DRIimage *image;
-   struct intel_screen *intelScreen = dri_screen->driverPrivate;
+   struct intel_screen *screen = dri_screen->driverPrivate;
    uint32_t tiling;
    int cpp;
    unsigned long pitch;
@@ -562,7 +536,7 @@ intel_create_image(__DRIscreen *dri_screen,
       return NULL;
 
    cpp = _mesa_get_format_bytes(image->format);
-   image->bo = drm_intel_bo_alloc_tiled(intelScreen->bufmgr, "image",
+   image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image",
                                         width, height, cpp, &tiling,
                                         &pitch, 0);
    if (image->bo == NULL) {
@@ -609,6 +583,9 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
    case __DRI_IMAGE_ATTRIB_NUM_PLANES:
       *value = 1;
       return true;
+   case __DRI_IMAGE_ATTRIB_OFFSET:
+      *value = image->offset;
+      return true;
 
   default:
       return false;
@@ -698,7 +675,7 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
                             int *fds, int num_fds, int *strides, int *offsets,
                             void *loaderPrivate)
 {
-   struct intel_screen *intelScreen = dri_screen->driverPrivate;
+   struct intel_screen *screen = dri_screen->driverPrivate;
    struct intel_image_format *f;
    __DRIimage *image;
    int i, index;
@@ -740,7 +717,7 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
          size = end;
    }
 
-   image->bo = drm_intel_bo_gem_create_from_prime(intelScreen->bufmgr,
+   image->bo = drm_intel_bo_gem_create_from_prime(screen->bufmgr,
                                                   fds[0], size);
    if (image->bo == NULL) {
       free(image);
@@ -845,7 +822,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
 }
 
 static const __DRIimageExtension intelImageExtension = {
-    .base = { __DRI_IMAGE, 11 },
+    .base = { __DRI_IMAGE, 13 },
 
     .createImageFromName                = intel_create_image_from_name,
     .createImageFromRenderbuffer        = intel_create_image_from_renderbuffer,
@@ -860,14 +837,16 @@ static const __DRIimageExtension intelImageExtension = {
     .createImageFromFds                 = intel_create_image_from_fds,
     .createImageFromDmaBufs             = intel_create_image_from_dma_bufs,
     .blitImage                          = NULL,
-    .getCapabilities                    = NULL
+    .getCapabilities                    = NULL,
+    .mapImage                           = NULL,
+    .unmapImage                         = NULL,
 };
 
 static int
 brw_query_renderer_integer(__DRIscreen *dri_screen,
                            int param, unsigned int *value)
 {
-   const struct intel_screen *const intelScreen =
+   const struct intel_screen *const screen =
       (struct intel_screen *) dri_screen->driverPrivate;
 
    switch (param) {
@@ -875,7 +854,7 @@ brw_query_renderer_integer(__DRIscreen *dri_screen,
       value[0] = 0x8086;
       return 0;
    case __DRI2_RENDERER_DEVICE_ID:
-      value[0] = intelScreen->deviceID;
+      value[0] = screen->deviceID;
       return 0;
    case __DRI2_RENDERER_ACCELERATED:
       value[0] = 1;
@@ -911,6 +890,9 @@ brw_query_renderer_integer(__DRIscreen *dri_screen,
    case __DRI2_RENDERER_UNIFIED_MEMORY_ARCHITECTURE:
       value[0] = 1;
       return 0;
+   case __DRI2_RENDERER_HAS_TEXTURE_3D:
+      value[0] = 1;
+      return 0;
    default:
       return driQueryRendererIntegerCommon(dri_screen, param, value);
    }
@@ -922,7 +904,7 @@ static int
 brw_query_renderer_string(__DRIscreen *dri_screen,
                           int param, const char **value)
 {
-   const struct intel_screen *intelScreen =
+   const struct intel_screen *screen =
       (struct intel_screen *) dri_screen->driverPrivate;
 
    switch (param) {
@@ -930,7 +912,7 @@ brw_query_renderer_string(__DRIscreen *dri_screen,
       value[0] = brw_vendor_string;
       return 0;
    case __DRI2_RENDERER_DEVICE_ID:
-      value[0] = brw_get_renderer_string(intelScreen);
+      value[0] = brw_get_renderer_string(screen);
       return 0;
    default:
       break;
@@ -950,7 +932,7 @@ static const __DRIrobustnessExtension dri2Robustness = {
    .base = { __DRI2_ROBUSTNESS, 1 }
 };
 
-static const __DRIextension *intelScreenExtensions[] = {
+static const __DRIextension *screenExtensions[] = {
     &intelTexBufferExtension.base,
     &intelFenceExtension.base,
     &intelFlushExtension.base,
@@ -1011,12 +993,12 @@ intel_get_integer(struct intel_screen *screen, int param)
 static void
 intelDestroyScreen(__DRIscreen * sPriv)
 {
-   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   struct intel_screen *screen = sPriv->driverPrivate;
 
-   dri_bufmgr_destroy(intelScreen->bufmgr);
-   driDestroyOptionInfo(&intelScreen->optionCache);
+   dri_bufmgr_destroy(screen->bufmgr);
+   driDestroyOptionInfo(&screen->optionCache);
 
-   ralloc_free(intelScreen);
+   ralloc_free(screen);
    sPriv->driverPrivate = NULL;
 }
 
@@ -1083,7 +1065,7 @@ intelCreateBuffer(__DRIscreen *dri_screen,
    if (mesaVis->depthBits == 24) {
       assert(mesaVis->stencilBits == 8);
 
-      if (screen->devinfo->has_hiz_and_separate_stencil) {
+      if (screen->devinfo.has_hiz_and_separate_stencil) {
          rb = intel_create_private_renderbuffer(MESA_FORMAT_Z24_UNORM_X8_UINT,
                                                 num_samples);
          _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
@@ -1134,21 +1116,21 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv)
 }
 
 static void
-intel_detect_sseu(struct intel_screen *intelScreen)
+intel_detect_sseu(struct intel_screen *screen)
 {
-   assert(intelScreen->devinfo->gen >= 8);
+   assert(screen->devinfo.gen >= 8);
    int ret;
 
-   intelScreen->subslice_total = -1;
-   intelScreen->eu_total = -1;
+   screen->subslice_total = -1;
+   screen->eu_total = -1;
 
-   ret = intel_get_param(intelScreen, I915_PARAM_SUBSLICE_TOTAL,
-                         &intelScreen->subslice_total);
+   ret = intel_get_param(screen, I915_PARAM_SUBSLICE_TOTAL,
+                         &screen->subslice_total);
    if (ret < 0 && ret != -EINVAL)
       goto err_out;
 
-   ret = intel_get_param(intelScreen,
-                         I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
+   ret = intel_get_param(screen,
+                         I915_PARAM_EU_TOTAL, &screen->eu_total);
    if (ret < 0 && ret != -EINVAL)
       goto err_out;
 
@@ -1156,35 +1138,35 @@ intel_detect_sseu(struct intel_screen *intelScreen)
     * and we have to use conservative numbers for GPGPU on many platforms, but
     * otherwise, things will just work.
     */
-   if (intelScreen->subslice_total < 1 || intelScreen->eu_total < 1)
+   if (screen->subslice_total < 1 || screen->eu_total < 1)
       _mesa_warning(NULL,
                     "Kernel 4.1 required to properly query GPU properties.\n");
 
    return;
 
 err_out:
-   intelScreen->subslice_total = -1;
-   intelScreen->eu_total = -1;
+   screen->subslice_total = -1;
+   screen->eu_total = -1;
    _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(-ret));
 }
 
 static bool
-intel_init_bufmgr(struct intel_screen *intelScreen)
+intel_init_bufmgr(struct intel_screen *screen)
 {
-   __DRIscreen *dri_screen = intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = screen->driScrnPriv;
 
-   intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+   screen->no_hw = getenv("INTEL_NO_HW") != NULL;
 
-   intelScreen->bufmgr = intel_bufmgr_gem_init(dri_screen->fd, BATCH_SZ);
-   if (intelScreen->bufmgr == NULL) {
+   screen->bufmgr = intel_bufmgr_gem_init(dri_screen->fd, BATCH_SZ);
+   if (screen->bufmgr == NULL) {
       fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
              __func__, __LINE__);
       return false;
    }
 
-   drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+   drm_intel_bufmgr_gem_enable_fenced_relocs(screen->bufmgr);
 
-   if (!intel_get_boolean(intelScreen, I915_PARAM_HAS_RELAXED_DELTA)) {
+   if (!intel_get_boolean(screen, I915_PARAM_HAS_RELAXED_DELTA)) {
       fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__);
       return false;
    }
@@ -1260,6 +1242,96 @@ intel_detect_timestamp(struct intel_screen *screen)
    return 0;
 }
 
+ /**
+ * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
+ *
+ * Some combinations of hardware and kernel versions allow this feature,
+ * while others don't.  Instead of trying to enumerate every case, just
+ * try and write a register and see if works.
+ */
+static bool
+intel_detect_pipelined_register(struct intel_screen *screen,
+                                int reg, uint32_t expected_value, bool reset)
+{
+   drm_intel_bo *results, *bo;
+   uint32_t *batch;
+   uint32_t offset = 0;
+   bool success = false;
+
+   /* Create a zero'ed temporary buffer for reading our results */
+   results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0);
+   if (results == NULL)
+      goto err;
+
+   bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
+   if (bo == NULL)
+      goto err_results;
+
+   if (drm_intel_bo_map(bo, 1))
+      goto err_batch;
+
+   batch = bo->virtual;
+
+   /* Write the register. */
+   *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
+   *batch++ = reg;
+   *batch++ = expected_value;
+
+   /* Save the register's value back to the buffer. */
+   *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
+   *batch++ = reg;
+   drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual,
+                           results, offset*sizeof(uint32_t),
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           I915_GEM_DOMAIN_INSTRUCTION);
+   *batch++ = results->offset + offset*sizeof(uint32_t);
+
+   /* And afterwards clear the register */
+   if (reset) {
+      *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
+      *batch++ = reg;
+      *batch++ = 0;
+   }
+
+   *batch++ = MI_BATCH_BUFFER_END;
+
+   drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8),
+                         NULL, 0, 0,
+                         I915_EXEC_RENDER);
+
+   /* Check whether the value got written. */
+   if (drm_intel_bo_map(results, false) == 0) {
+      success = *((uint32_t *)results->virtual + offset) == expected_value;
+      drm_intel_bo_unmap(results);
+   }
+
+err_batch:
+   drm_intel_bo_unreference(bo);
+err_results:
+   drm_intel_bo_unreference(results);
+err:
+   return success;
+}
+
+static bool
+intel_detect_pipelined_so(struct intel_screen *screen)
+{
+   /* Supposedly, Broadwell just works. */
+   if (screen->devinfo.gen >= 8)
+      return true;
+
+   if (screen->devinfo.gen <= 6)
+      return false;
+
+   /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
+    * statistics registers), and we already reset it to zero before using it.
+    */
+   return intel_detect_pipelined_register(screen,
+                                          GEN7_SO_WRITE_OFFSET(0),
+                                          0x1337d0d0,
+                                          false);
+}
+
 /**
  * Return array of MSAA modes supported by the hardware. The array is
  * zero-terminated and sorted in decreasing order.
@@ -1273,13 +1345,13 @@ intel_supported_msaa_modes(const struct intel_screen  *screen)
    static const int gen6_modes[] = {4, 0, -1};
    static const int gen4_modes[] = {0, -1};
 
-   if (screen->devinfo->gen >= 9) {
+   if (screen->devinfo.gen >= 9) {
       return gen9_modes;
-   } else if (screen->devinfo->gen >= 8) {
+   } else if (screen->devinfo.gen >= 8) {
       return gen8_modes;
-   } else if (screen->devinfo->gen >= 7) {
+   } else if (screen->devinfo.gen >= 7) {
       return gen7_modes;
-   } else if (screen->devinfo->gen == 6) {
+   } else if (screen->devinfo.gen == 6) {
       return gen6_modes;
    } else {
       return gen4_modes;
@@ -1304,7 +1376,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
    static const uint8_t multisample_samples[2]  = {4, 8};
 
    struct intel_screen *screen = dri_screen->driverPrivate;
-   const struct gen_device_info *devinfo = screen->devinfo;
+   const struct gen_device_info *devinfo = &screen->devinfo;
    uint8_t depth_bits[4], stencil_bits[4];
    __DRIconfig **configs = NULL;
 
@@ -1426,20 +1498,22 @@ static void
 set_max_gl_versions(struct intel_screen *screen)
 {
    __DRIscreen *dri_screen = screen->driScrnPriv;
+   const bool has_astc = screen->devinfo.gen >= 9;
 
-   switch (screen->devinfo->gen) {
+   switch (screen->devinfo.gen) {
    case 9:
    case 8:
-      dri_screen->max_gl_core_version = 44;
+      dri_screen->max_gl_core_version = 45;
       dri_screen->max_gl_compat_version = 30;
       dri_screen->max_gl_es1_version = 11;
-      dri_screen->max_gl_es2_version = 31;
+      dri_screen->max_gl_es2_version = has_astc ? 32 : 31;
       break;
    case 7:
-      dri_screen->max_gl_core_version = 33;
+      dri_screen->max_gl_core_version = screen->devinfo.is_haswell &&
+         can_do_pipelined_register_writes(screen) ? 45 : 33;
       dri_screen->max_gl_compat_version = 30;
       dri_screen->max_gl_es1_version = 11;
-      dri_screen->max_gl_es2_version = screen->devinfo->is_haswell ? 31 : 30;
+      dri_screen->max_gl_es2_version = screen->devinfo.is_haswell ? 31 : 30;
       break;
    case 6:
       dri_screen->max_gl_core_version = 33;
@@ -1486,11 +1560,6 @@ brw_get_revision(int fd)
    return revision;
 }
 
-/* Drop when RS headers get pulled to libdrm */
-#ifndef I915_PARAM_HAS_RESOURCE_STREAMER
-#define I915_PARAM_HAS_RESOURCE_STREAMER 36
-#endif
-
 static void
 shader_debug_log_mesa(void *data, const char *fmt, ...)
 {
@@ -1540,7 +1609,7 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
 static const
 __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
 {
-   struct intel_screen *intelScreen;
+   struct intel_screen *screen;
 
    if (dri_screen->image.loader) {
    } else if (dri_screen->dri2.loader->base.version <= 2 ||
@@ -1548,47 +1617,42 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
       fprintf(stderr,
              "\nERROR!  DRI2 loader with getBuffersWithFormat() "
              "support required\n");
-      return false;
+      return NULL;
    }
 
    /* Allocate the private area */
-   intelScreen = rzalloc(NULL, struct intel_screen);
-   if (!intelScreen) {
+   screen = rzalloc(NULL, struct intel_screen);
+   if (!screen) {
       fprintf(stderr, "\nERROR!  Allocating private area failed\n");
-      return false;
+      return NULL;
    }
    /* parse information in __driConfigOptions */
-   driParseOptionInfo(&intelScreen->optionCache, brw_config_options.xml);
+   driParseOptionInfo(&screen->optionCache, brw_config_options.xml);
 
-   intelScreen->driScrnPriv = dri_screen;
-   dri_screen->driverPrivate = (void *) intelScreen;
+   screen->driScrnPriv = dri_screen;
+   dri_screen->driverPrivate = (void *) screen;
 
-   if (!intel_init_bufmgr(intelScreen))
-       return false;
+   if (!intel_init_bufmgr(screen))
+       return NULL;
 
-   intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
-   intelScreen->devinfo = gen_get_device_info(intelScreen->deviceID);
-   if (!intelScreen->devinfo)
-      return false;
+   screen->deviceID = drm_intel_bufmgr_gem_get_devid(screen->bufmgr);
+   if (!gen_get_device_info(screen->deviceID, &screen->devinfo))
+      return NULL;
+
+   const struct gen_device_info *devinfo = &screen->devinfo;
 
    brw_process_intel_debug_variable();
 
    if (INTEL_DEBUG & DEBUG_BUFMGR)
-      dri_bufmgr_set_debug(intelScreen->bufmgr, true);
+      dri_bufmgr_set_debug(screen->bufmgr, true);
 
-   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && intelScreen->devinfo->gen < 7) {
+   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && devinfo->gen < 7) {
       fprintf(stderr,
               "shader_time debugging requires gen7 (Ivybridge) or better.\n");
       INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
    }
 
-   if (INTEL_DEBUG & DEBUG_AUB)
-      drm_intel_bufmgr_gem_set_aub_dump(intelScreen->bufmgr, true);
-
-#ifndef I915_PARAM_MMAP_GTT_VERSION
-#define I915_PARAM_MMAP_GTT_VERSION 40 /* XXX delete me with new libdrm */
-#endif
-   if (intel_get_integer(intelScreen, I915_PARAM_MMAP_GTT_VERSION) >= 1) {
+   if (intel_get_integer(screen, I915_PARAM_MMAP_GTT_VERSION) >= 1) {
       /* Theorectically unlimited! At least for individual objects...
        *
        * Currently the entire (global) address space for all GTT maps is
@@ -1604,7 +1668,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
        * objects of the current maximum allocable size before running out
        * of mmap space.
        */
-      intelScreen->max_gtt_map_object_size = UINT64_MAX;
+      screen->max_gtt_map_object_size = UINT64_MAX;
    } else {
       /* Estimate the size of the mappable aperture into the GTT.  There's an
        * ioctl to get the whole GTT size, but not one to get the mappable subset.
@@ -1619,30 +1683,33 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
        * taken up by things like the framebuffer and the ringbuffer and such, so
        * be more conservative.
        */
-      intelScreen->max_gtt_map_object_size = gtt_size / 4;
+      screen->max_gtt_map_object_size = gtt_size / 4;
    }
 
-   intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
-   intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
+   screen->hw_has_swizzling = intel_detect_swizzling(screen);
+   screen->hw_has_timestamp = intel_detect_timestamp(screen);
 
    /* GENs prior to 8 do not support EU/Subslice info */
-   if (intelScreen->devinfo->gen >= 8) {
-      intel_detect_sseu(intelScreen);
-   } else if (intelScreen->devinfo->gen == 7) {
-      intelScreen->subslice_total = 1 << (intelScreen->devinfo->gt - 1);
+   if (devinfo->gen >= 8) {
+      intel_detect_sseu(screen);
+   } else if (devinfo->gen == 7) {
+      screen->subslice_total = 1 << (devinfo->gt - 1);
    }
 
+   if (intel_detect_pipelined_so(screen))
+      screen->kernel_features |= KERNEL_ALLOWS_SOL_OFFSET_WRITES;
+
    const char *force_msaa = getenv("INTEL_FORCE_MSAA");
    if (force_msaa) {
-      intelScreen->winsys_msaa_samples_override =
-         intel_quantize_num_samples(intelScreen, atoi(force_msaa));
+      screen->winsys_msaa_samples_override =
+         intel_quantize_num_samples(screen, atoi(force_msaa));
       printf("Forcing winsys sample count to %d\n",
-             intelScreen->winsys_msaa_samples_override);
+             screen->winsys_msaa_samples_override);
    } else {
-      intelScreen->winsys_msaa_samples_override = -1;
+      screen->winsys_msaa_samples_override = -1;
    }
 
-   set_max_gl_versions(intelScreen);
+   set_max_gl_versions(screen);
 
    /* Notification of GPU resets requires hardware contexts and a kernel new
     * enough to support DRM_IOCTL_I915_GET_RESET_STATS.  If the ioctl is
@@ -1653,43 +1720,56 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
     *
     * Don't even try on pre-Gen6, since we don't attempt to use contexts there.
     */
-   if (intelScreen->devinfo->gen >= 6) {
+   if (devinfo->gen >= 6) {
       struct drm_i915_reset_stats stats;
       memset(&stats, 0, sizeof(stats));
 
       const int ret = drmIoctl(dri_screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
 
-      intelScreen->has_context_reset_notification =
+      screen->has_context_reset_notification =
          (ret != -1 || errno != EINVAL);
    }
 
-   if (intel_get_param(intelScreen, I915_PARAM_CMD_PARSER_VERSION,
-                       &intelScreen->cmd_parser_version) < 0) {
-      intelScreen->cmd_parser_version = 0;
+   if (intel_get_param(screen, I915_PARAM_CMD_PARSER_VERSION,
+                       &screen->cmd_parser_version) < 0) {
+      screen->cmd_parser_version = 0;
+   }
+
+   if (devinfo->gen >= 8 || screen->cmd_parser_version >= 2)
+      screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES;
+
+   /* Haswell requires command parser version 4 in order to have L3
+    * atomic scratch1 and chicken3 bits
+    */
+   if (devinfo->is_haswell && screen->cmd_parser_version >= 4) {
+      screen->kernel_features |=
+         KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
    }
 
    /* Haswell requires command parser version 6 in order to write to the
     * MI_MATH GPR registers, and version 7 in order to use
     * MI_LOAD_REGISTER_REG (which all users of MI_MATH use).
     */
-   intelScreen->has_mi_math_and_lrr = intelScreen->devinfo->gen >= 8 ||
-                                      (intelScreen->devinfo->is_haswell &&
-                                       intelScreen->cmd_parser_version >= 7);
-
-   dri_screen->extensions = !intelScreen->has_context_reset_notification
-      ? intelScreenExtensions : intelRobustScreenExtensions;
-
-   intelScreen->compiler = brw_compiler_create(intelScreen,
-                                               intelScreen->devinfo);
-   intelScreen->compiler->shader_debug_log = shader_debug_log_mesa;
-   intelScreen->compiler->shader_perf_log = shader_perf_log_mesa;
-   intelScreen->program_id = 1;
-
-   if (intelScreen->devinfo->has_resource_streamer) {
-      intelScreen->has_resource_streamer =
-        intel_get_boolean(intelScreen, I915_PARAM_HAS_RESOURCE_STREAMER);
+   if (devinfo->gen >= 8 ||
+       (devinfo->is_haswell && screen->cmd_parser_version >= 7)) {
+      screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR;
    }
 
+   /* Gen7 needs at least command parser version 5 to support compute */
+   if (devinfo->gen >= 8 || screen->cmd_parser_version >= 5)
+      screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH;
+
+   dri_screen->extensions = !screen->has_context_reset_notification
+      ? screenExtensions : intelRobustScreenExtensions;
+
+   screen->compiler = brw_compiler_create(screen, devinfo);
+   screen->compiler->shader_debug_log = shader_debug_log_mesa;
+   screen->compiler->shader_perf_log = shader_perf_log_mesa;
+   screen->program_id = 1;
+
+   screen->has_exec_fence =
+     intel_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE);
+
    return (const __DRIconfig**) intel_screen_make_configs(dri_screen);
 }
 
@@ -1704,7 +1784,7 @@ intelAllocateBuffer(__DRIscreen *dri_screen,
                    int width, int height)
 {
    struct intel_buffer *intelBuffer;
-   struct intel_screen *intelScreen = dri_screen->driverPrivate;
+   struct intel_screen *screen = dri_screen->driverPrivate;
 
    assert(attachment == __DRI_BUFFER_FRONT_LEFT ||
           attachment == __DRI_BUFFER_BACK_LEFT);
@@ -1717,7 +1797,7 @@ intelAllocateBuffer(__DRIscreen *dri_screen,
    uint32_t tiling = I915_TILING_X;
    unsigned long pitch;
    int cpp = format / 8;
-   intelBuffer->bo = drm_intel_bo_alloc_tiled(intelScreen->bufmgr,
+   intelBuffer->bo = drm_intel_bo_alloc_tiled(screen->bufmgr,
                                               "intelAllocateBuffer",
                                               width,
                                               height,