driconfig: add a new engine name/version parameter

[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index 9d108fe31d894e39c9386cbf2316cdfde49170c1..ac84864f521dbac5a99966f8513e4ad53ab46c87 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -36,6 +36,7 @@
  #include "main/context.h"
  #include "main/fbobject.h"
  #include "main/extensions.h"
+#include "main/glthread.h"
  #include "main/imports.h"
  #include "main/macros.h"
  #include "main/points.h"
@@ -45,8 +46,9 @@
  #include "main/framebuffer.h"
  #include "main/stencil.h"
  #include "main/state.h"
+#include "main/spirv_extensions.h"
  
-#include "vbo/vbo_context.h"
+#include "vbo/vbo.h"
  
  #include "drivers/common/driverfuncs.h"
  #include "drivers/common/meta.h"
@@ -73,8 +75,12 @@
  #include "tnl/t_pipeline.h"
  #include "util/ralloc.h"
  #include "util/debug.h"
+#include "util/disk_cache.h"
  #include "isl/isl.h"
  
+#include "common/gen_defines.h"
+
+#include "compiler/spirv/nir_spirv.h"
  /***************************************
   * Mesa's Driver Functions
   ***************************************/
@@ -143,6 +149,24 @@ intel_get_string(struct gl_context * ctx, GLenum name)
     }
  }
  
+static void
+brw_set_background_context(struct gl_context *ctx,
+                           struct util_queue_monitoring *queue_info)
+{
+   struct brw_context *brw = brw_context(ctx);
+   __DRIcontext *driContext = brw->driContext;
+   __DRIscreen *driScreen = driContext->driScreenPriv;
+   const __DRIbackgroundCallableExtension *backgroundCallable =
+      driScreen->dri2.backgroundCallable;
+
+   /* Note: Mesa will only call this function if we've called
+    * _mesa_enable_multithreading().  We only do that if the loader exposed
+    * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
+    * backgroundCallable is not NULL.
+    */
+   backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
+}
+
  static void
  intel_viewport(struct gl_context *ctx)
  {
@@ -194,8 +218,6 @@ intel_update_state(struct gl_context * ctx)
     if (new_state & _NEW_POLYGON)
        brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
  
-   intel_prepare_render(brw);
-
     if (new_state & _NEW_BUFFERS) {
        intel_update_framebuffer(ctx, ctx->DrawBuffer);
        if (ctx->DrawBuffer != ctx->ReadBuffer)
@@ -237,6 +259,35 @@ intel_flush_front(struct gl_context *ctx)
     }
  }
  
+static void
+brw_display_shared_buffer(struct brw_context *brw)
+{
+   __DRIcontext *dri_context = brw->driContext;
+   __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
+   int fence_fd = -1;
+
+   if (!brw->is_shared_buffer_bound)
+      return;
+
+   if (!brw->is_shared_buffer_dirty)
+      return;
+
+   if (brw->screen->has_exec_fence) {
+      /* This function is always called during a flush operation, so there is
+       * no need to flush again here. But we want to provide a fence_fd to the
+       * loader, and a redundant flush is the easiest way to acquire one.
+       */
+      if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
+         return;
+   }
+
+   dri_screen->mutableRenderBuffer.loader
+      ->displaySharedBuffer(dri_drawable, fence_fd,
+                            dri_drawable->loaderPrivate);
+   brw->is_shared_buffer_dirty = false;
+}
+
  static void
  intel_glFlush(struct gl_context *ctx)
  {
@@ -244,7 +295,7 @@ intel_glFlush(struct gl_context *ctx)
  
     intel_batchbuffer_flush(brw);
     intel_flush_front(ctx);
-
+   brw_display_shared_buffer(brw);
     brw->need_flush_throttle = true;
  }
  
@@ -263,6 +314,8 @@ static void
  brw_init_driver_functions(struct brw_context *brw,
                            struct dd_function_table *functions)
  {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
     _mesa_init_driver_functions(functions);
  
     /* GLX uses DRI2 invalidate events to handle window resizing.
@@ -280,9 +333,9 @@ brw_init_driver_functions(struct brw_context *brw,
     functions->GetString = intel_get_string;
     functions->UpdateState = intel_update_state;
  
+   brw_init_draw_functions(functions);
     intelInitTextureFuncs(functions);
     intelInitTextureImageFuncs(functions);
-   intelInitTextureSubImageFuncs(functions);
     intelInitTextureCopyImageFuncs(functions);
     intelInitCopyImageFuncs(functions);
     intelInitClearFuncs(functions);
@@ -294,15 +347,17 @@ brw_init_driver_functions(struct brw_context *brw,
  
     brwInitFragProgFuncs( functions );
     brw_init_common_queryobj_functions(functions);
-   if (brw->gen >= 8 || brw->is_haswell)
+   if (devinfo->gen >= 8 || devinfo->is_haswell)
        hsw_init_queryobj_functions(functions);
-   else if (brw->gen >= 6)
+   else if (devinfo->gen >= 6)
        gen6_init_queryobj_functions(functions);
     else
        gen4_init_queryobj_functions(functions);
     brw_init_compute_functions(functions);
     brw_init_conditional_render_functions(functions);
  
+   functions->GenerateMipmap = brw_generate_mipmap;
+
     functions->QueryInternalFormat = brw_query_internal_format;
  
     functions->NewTransformFeedback = brw_new_transform_feedback;
@@ -312,7 +367,7 @@ brw_init_driver_functions(struct brw_context *brw,
        functions->EndTransformFeedback = hsw_end_transform_feedback;
        functions->PauseTransformFeedback = hsw_pause_transform_feedback;
        functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
-   } else if (brw->gen >= 7) {
+   } else if (devinfo->gen >= 7) {
        functions->BeginTransformFeedback = gen7_begin_transform_feedback;
        functions->EndTransformFeedback = gen7_end_transform_feedback;
        functions->PauseTransformFeedback = gen7_pause_transform_feedback;
@@ -328,28 +383,64 @@ brw_init_driver_functions(struct brw_context *brw,
           brw_get_transform_feedback_vertex_count;
     }
  
-   if (brw->gen >= 6)
+   if (devinfo->gen >= 6)
        functions->GetSamplePosition = gen6_get_sample_position;
+
+   /* GL_ARB_get_program_binary */
+   brw_program_binary_init(brw->screen->deviceID);
+   functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
+   functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
+   functions->ProgramBinaryDeserializeDriverBlob =
+      brw_deserialize_program_binary;
+
+   if (brw->screen->disk_cache) {
+      functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
+   }
+
+   functions->SetBackgroundContext = brw_set_background_context;
+}
+
+static void
+brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   struct gl_context *ctx = &brw->ctx;
+
+   /* The following SPIR-V capabilities are only supported on gen7+. In theory
+    * you should enable the extension only on gen7+, but just in case let's
+    * assert it.
+    */
+   assert(devinfo->gen >= 7);
+
+   ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
+   ctx->Const.SpirVCapabilities.draw_parameters = true;
+   ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
+   ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
+   ctx->Const.SpirVCapabilities.image_write_without_format = true;
+   ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
+   ctx->Const.SpirVCapabilities.tessellation = true;
+   ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
+   ctx->Const.SpirVCapabilities.variable_pointers = true;
  }
  
  static void
  brw_initialize_context_constants(struct brw_context *brw)
  {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
     struct gl_context *ctx = &brw->ctx;
     const struct brw_compiler *compiler = brw->screen->compiler;
  
     const bool stage_exists[MESA_SHADER_STAGES] = {
        [MESA_SHADER_VERTEX] = true,
-      [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
-      [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
-      [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
+      [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
+      [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
+      [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
        [MESA_SHADER_FRAGMENT] = true,
        [MESA_SHADER_COMPUTE] =
-         ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) &&
+         (_mesa_is_desktop_gl(ctx) &&
            ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
           (ctx->API == API_OPENGLES2 &&
-          ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
-         _mesa_extension_override_enables.ARB_compute_shader,
+          ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
     };
  
     unsigned num_stages = 0;
@@ -359,7 +450,7 @@ brw_initialize_context_constants(struct brw_context *brw)
     }
  
     unsigned max_samplers =
-      brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
+      devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
  
     ctx->Const.MaxDualSourceDrawBuffers = 1;
     ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
@@ -385,27 +476,27 @@ brw_initialize_context_constants(struct brw_context *brw)
  
     ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
     ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
-   if (brw->gen >= 7) {
+   if (devinfo->gen >= 7) {
        ctx->Const.MaxRenderbufferSize = 16384;
-      ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
+      ctx->Const.MaxTextureSize = 16384;
        ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
     } else {
        ctx->Const.MaxRenderbufferSize = 8192;
-      ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+      ctx->Const.MaxTextureSize = 8192;
        ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
     }
     ctx->Const.Max3DTextureLevels = 12; /* 2048 */
-   ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
+   ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
     ctx->Const.MaxTextureMbytes = 1536;
-   ctx->Const.MaxTextureRectSize = brw->gen >= 7 ? 16384 : 8192;
+   ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
     ctx->Const.MaxTextureMaxAnisotropy = 16.0;
     ctx->Const.MaxTextureLodBias = 15.0;
     ctx->Const.StripTextureBorder = true;
-   if (brw->gen >= 7) {
+   if (devinfo->gen >= 7) {
        ctx->Const.MaxProgramTextureGatherComponents = 4;
        ctx->Const.MinProgramTextureGatherOffset = -32;
        ctx->Const.MaxProgramTextureGatherOffset = 31;
-   } else if (brw->gen == 6) {
+   } else if (devinfo->gen == 6) {
        ctx->Const.MaxProgramTextureGatherComponents = 1;
        ctx->Const.MinProgramTextureGatherOffset = -8;
        ctx->Const.MaxProgramTextureGatherOffset = 7;
@@ -504,7 +595,7 @@ brw_initialize_context_constants(struct brw_context *brw)
  
     ctx->Const.MinLineWidth = 1.0;
     ctx->Const.MinLineWidthAA = 1.0;
-   if (brw->gen >= 6) {
+   if (devinfo->gen >= 6) {
        ctx->Const.MaxLineWidth = 7.375;
        ctx->Const.MaxLineWidthAA = 7.375;
        ctx->Const.LineWidthGranularity = 0.125;
@@ -527,12 +618,12 @@ brw_initialize_context_constants(struct brw_context *brw)
     ctx->Const.MaxPointSizeAA = 255.0;
     ctx->Const.PointSizeGranularity = 1.0;
  
-   if (brw->gen >= 5 || brw->is_g4x)
+   if (devinfo->gen >= 5 || devinfo->is_g4x)
        ctx->Const.MaxClipPlanes = 8;
  
+   ctx->Const.GLSLFragCoordIsSysVal = true;
+   ctx->Const.GLSLFrontFacingIsSysVal = true;
     ctx->Const.GLSLTessLevelsAsInputs = true;
-   ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
-   ctx->Const.LowerTESPatchVerticesIn = true;
     ctx->Const.PrimitiveRestartForPatches = true;
  
     ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
@@ -582,11 +673,10 @@ brw_initialize_context_constants(struct brw_context *brw)
      * that affect provoking vertex decision. Always use last vertex
      * convention for quad primitive which works as expected for now.
      */
-   if (brw->gen >= 6)
+   if (devinfo->gen >= 6)
        ctx->Const.QuadsFollowProvokingVertexConvention = false;
  
     ctx->Const.NativeIntegers = true;
-   ctx->Const.VertexID_is_zero_based = true;
  
     /* Regarding the CMP instruction, the Ivybridge PRM says:
      *
@@ -614,8 +704,11 @@ brw_initialize_context_constants(struct brw_context *brw)
      *      the element in the buffer."
      *
      * However, unaligned accesses are slower, so enforce buffer alignment.
+    *
+    * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
+    * restriction: the start of the buffer needs to be 32B aligned.
      */
-   ctx->Const.UniformBufferOffsetAlignment = 16;
+   ctx->Const.UniformBufferOffsetAlignment = 32;
  
     /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
      * that we can safely have the CPU and GPU writing the same SSBO on
@@ -628,10 +721,11 @@ brw_initialize_context_constants(struct brw_context *brw)
     ctx->Const.TextureBufferOffsetAlignment = 16;
     ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
  
-   if (brw->gen >= 6) {
+   if (devinfo->gen >= 6) {
        ctx->Const.MaxVarying = 32;
        ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
+      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
+         compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
        ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
        ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
        ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
@@ -646,15 +740,15 @@ brw_initialize_context_constants(struct brw_context *brw)
           brw->screen->compiler->glsl_compiler_options[i];
     }
  
-   if (brw->gen >= 7) {
+   if (devinfo->gen >= 7) {
        ctx->Const.MaxViewportWidth = 32768;
        ctx->Const.MaxViewportHeight = 32768;
     }
  
     /* ARB_viewport_array, OES_viewport_array */
-   if (brw->gen >= 6) {
+   if (devinfo->gen >= 6) {
        ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
-      ctx->Const.ViewportSubpixelBits = 0;
+      ctx->Const.ViewportSubpixelBits = 8;
  
        /* Cast to float before negating because MaxViewportWidth is unsigned.
         */
@@ -663,7 +757,7 @@ brw_initialize_context_constants(struct brw_context *brw)
     }
  
     /* ARB_gpu_shader5 */
-   if (brw->gen >= 7)
+   if (devinfo->gen >= 7)
        ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
  
     /* ARB_framebuffer_no_attachments */
@@ -674,6 +768,28 @@ brw_initialize_context_constants(struct brw_context *brw)
  
     /* OES_primitive_bounding_box */
     ctx->Const.NoPrimitiveBoundingBoxOutput = true;
+
+   /* TODO: We should be able to use STD430 packing by default on all hardware
+    * but some piglit tests [1] currently fail on SNB when this is enabled.
+    * The problem is the messages we're using for doing uniform pulls
+    * in the vec4 back-end on SNB is the OWORD block load instruction, which
+    * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
+    * sampler which doesn't have these restrictions.
+    *
+    * In the scalar back-end, we use the sampler for dynamic uniform loads and
+    * pull an entire cache line at a time for constant offset loads both of
+    * which support almost any alignment.
+    *
+    * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
+    */
+   if (devinfo->gen >= 7)
+      ctx->Const.UseSTD430AsDefaultPacking = true;
+
+   if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
+      ctx->Const.AllowMappedBuffersDuringExecution = true;
+
+   /* GL_ARB_get_program_binary */
+   ctx->Const.NumProgramBinaryFormats = 1;
  }
  
  static void
@@ -684,7 +800,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
     struct gen_device_info *devinfo = &brw->screen->devinfo;
  
     /* FINISHME: Do this for all platforms that the kernel supports */
-   if (brw->is_cherryview &&
+   if (devinfo->is_cherryview &&
         screen->subslice_total > 0 && screen->eu_total > 0) {
        /* Logical CS threads = EUs per subslice * 7 threads per EU */
        uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
@@ -722,28 +838,24 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
  static void
  brw_process_driconf_options(struct brw_context *brw)
  {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
     struct gl_context *ctx = &brw->ctx;
  
     driOptionCache *options = &brw->optionCache;
     driParseConfigFiles(options, &brw->screen->optionCache,
-                       brw->driContext->driScreenPriv->myNum, "i965");
-
-   int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
-   switch (bo_reuse_mode) {
-   case DRI_CONF_BO_REUSE_DISABLED:
-      break;
-   case DRI_CONF_BO_REUSE_ALL:
-      brw_bufmgr_enable_reuse(brw->bufmgr);
-      break;
-   }
+                       brw->driContext->driScreenPriv->myNum,
+                       "i965", NULL, NULL, 0);
  
     if (INTEL_DEBUG & DEBUG_NO_HIZ) {
         brw->has_hiz = false;
         /* On gen6, you can only do separate stencil with HIZ. */
-       if (brw->gen == 6)
+       if (devinfo->gen == 6)
            brw->has_separate_stencil = false;
     }
  
+   if (driQueryOptionb(options, "mesa_no_error"))
+      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
+
     if (driQueryOptionb(options, "always_flush_batch")) {
        fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
        brw->always_flush_batch = true;
@@ -789,18 +901,22 @@ brw_process_driconf_options(struct brw_context *brw)
  
     brw->dual_color_blend_by_location =
        driQueryOptionb(options, "dual_color_blend_by_location");
+
+   ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
+      driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
+
+   ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
+   driComputeOptionsSha1(&brw->screen->optionCache,
+                         ctx->Const.dri_config_options_sha1);
  }
  
  GLboolean
  brwCreateContext(gl_api api,
-                const struct gl_config *mesaVis,
-                __DRIcontext *driContextPriv,
-                 unsigned major_version,
-                 unsigned minor_version,
-                 uint32_t flags,
-                 bool notify_reset,
+                 const struct gl_config *mesaVis,
+                 __DRIcontext *driContextPriv,
+                 const struct __DriverContextConfig *ctx_config,
                   unsigned *dri_ctx_error,
-                void *sharedContextPrivate)
+                 void *sharedContextPrivate)
  {
     struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
     struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
@@ -810,75 +926,68 @@ brwCreateContext(gl_api api,
     /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
      * provides us with context reset notifications.
      */
-   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
-      | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
+   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
+                            __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
+                            __DRI_CTX_FLAG_NO_ERROR;
  
     if (screen->has_context_reset_notification)
        allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
  
-   if (flags & ~allowed_flags) {
+   if (ctx_config->flags & ~allowed_flags) {
        *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
        return false;
     }
  
+   if (ctx_config->attribute_mask &
+       ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
+         __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
+      return false;
+   }
+
+   bool notify_reset =
+      ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
+       ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
+
     struct brw_context *brw = rzalloc(NULL, struct brw_context);
     if (!brw) {
        fprintf(stderr, "%s: failed to alloc context\n", __func__);
        *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
        return false;
     }
+   brw->perf_ctx = gen_perf_new_context(brw);
  
     driContextPriv->driverPrivate = brw;
     brw->driContext = driContextPriv;
     brw->screen = screen;
     brw->bufmgr = screen->bufmgr;
  
-   brw->gen = devinfo->gen;
-   brw->gt = devinfo->gt;
-   brw->is_g4x = devinfo->is_g4x;
-   brw->is_baytrail = devinfo->is_baytrail;
-   brw->is_haswell = devinfo->is_haswell;
-   brw->is_cherryview = devinfo->is_cherryview;
-   brw->is_broxton = devinfo->is_broxton || devinfo->is_geminilake;
-   brw->has_llc = devinfo->has_llc;
     brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
     brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
-   brw->has_pln = devinfo->has_pln;
-   brw->has_compr4 = devinfo->has_compr4;
-   brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
-   brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
-   brw->needs_unlit_centroid_workaround =
-      devinfo->needs_unlit_centroid_workaround;
-
-   brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
+
     brw->has_swizzling = screen->hw_has_swizzling;
  
-   isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling);
+   brw->isl_dev = screen->isl_dev;
  
     brw->vs.base.stage = MESA_SHADER_VERTEX;
     brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
     brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
     brw->gs.base.stage = MESA_SHADER_GEOMETRY;
     brw->wm.base.stage = MESA_SHADER_FRAGMENT;
-   if (brw->gen >= 8) {
-      gen8_init_vtable_surface_functions(brw);
-      brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
-   } else if (brw->gen >= 7) {
-      gen7_init_vtable_surface_functions(brw);
-      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
-   } else if (brw->gen >= 6) {
-      gen6_init_vtable_surface_functions(brw);
-      brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
-   } else {
-      gen4_init_vtable_surface_functions(brw);
-      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
-   }
+   brw->cs.base.stage = MESA_SHADER_COMPUTE;
  
     brw_init_driver_functions(brw, &functions);
  
     if (notify_reset)
        functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
  
+   brw_process_driconf_options(brw);
+
+   if (api == API_OPENGL_CORE &&
+       driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
+      api = API_OPENGL_COMPAT;
+   }
+
     struct gl_context *ctx = &brw->ctx;
  
     if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
@@ -888,7 +997,7 @@ brwCreateContext(gl_api api,
        return false;
     }
  
-   driContextSetFlags(ctx, flags);
+   driContextSetFlags(ctx, ctx_config->flags);
  
     /* Initialize the software rasterizer and helper modules.
      *
@@ -913,8 +1022,6 @@ brwCreateContext(gl_api api,
  
     _mesa_meta_init(ctx);
  
-   brw_process_driconf_options(brw);
-
     if (INTEL_DEBUG & DEBUG_PERF)
        brw->perf_debug = true;
  
@@ -929,20 +1036,39 @@ brwCreateContext(gl_api api,
  
     intel_fbo_init(brw);
  
-   intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc);
+   intel_batchbuffer_init(brw);
  
-   if (brw->gen >= 6) {
-      /* Create a new hardware context.  Using a hardware context means that
-       * our GPU state will be saved/restored on context switch, allowing us
-       * to assume that the GPU is in the same state we left it in.
-       *
-       * This is required for transform feedback buffer offsets, query objects,
-       * and also allows us to reduce how much state we have to emit.
-       */
-      brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
+   /* Create a new hardware context.  Using a hardware context means that
+    * our GPU state will be saved/restored on context switch, allowing us
+    * to assume that the GPU is in the same state we left it in.
+    *
+    * This is required for transform feedback buffer offsets, query objects,
+    * and also allows us to reduce how much state we have to emit.
+    */
+   brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
+   if (!brw->hw_ctx && devinfo->gen >= 6) {
+      fprintf(stderr, "Failed to create hardware context.\n");
+      intelDestroyContext(driContextPriv);
+      return false;
+   }
  
-      if (!brw->hw_ctx) {
-         fprintf(stderr, "Failed to create hardware context.\n");
+   if (brw->hw_ctx) {
+      int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
+      if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
+         switch (ctx_config->priority) {
+         case __DRI_CTX_PRIORITY_LOW:
+            hw_priority = GEN_CONTEXT_LOW_PRIORITY;
+            break;
+         case __DRI_CTX_PRIORITY_HIGH:
+            hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
+            break;
+         }
+      }
+      if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
+          brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
+         fprintf(stderr,
+                "Failed to set priority [%d:%d] for hardware context.\n",
+                 ctx_config->priority, hw_priority);
           intelDestroyContext(driContextPriv);
           return false;
        }
@@ -954,6 +1080,8 @@ brwCreateContext(gl_api api,
        return false;
     }
  
+   brw_upload_init(&brw->upload, brw->bufmgr, 65536);
+
     brw_init_state(brw);
  
     intelInitExtensions(ctx);
@@ -964,7 +1092,7 @@ brwCreateContext(gl_api api,
  
     brw->urb.size = devinfo->urb.size;
  
-   if (brw->gen == 6)
+   if (devinfo->gen == 6)
        brw->urb.gs_present = false;
  
     brw->prim_restart.in_progress = false;
@@ -981,12 +1109,12 @@ brwCreateContext(gl_api api,
  
     brw_draw_init( brw );
  
-   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
+   if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
        /* Turn on some extra GL_ARB_debug_output generation. */
        brw->perf_debug = true;
     }
  
-   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
+   if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
        ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
        ctx->Const.RobustAccess = GL_TRUE;
     }
@@ -994,8 +1122,21 @@ brwCreateContext(gl_api api,
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
        brw_init_shader_time(brw);
  
+   _mesa_override_extensions(ctx);
     _mesa_compute_version(ctx);
  
+   /* GL_ARB_gl_spirv */
+   if (ctx->Extensions.ARB_gl_spirv) {
+      brw_initialize_spirv_supported_capabilities(brw);
+
+      if (ctx->Extensions.ARB_spirv_extensions) {
+         /* GL_ARB_spirv_extensions */
+         ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
+         _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
+                                               &ctx->Const.SpirVCapabilities);
+      }
+   }
+
     _mesa_initialize_dispatch_tables(ctx);
     _mesa_initialize_vbo_vtxfmt(ctx);
  
@@ -1005,6 +1146,14 @@ brwCreateContext(gl_api api,
     vbo_use_buffer_objects(ctx);
     vbo_always_unmap_buffers(ctx);
  
+   brw->ctx.Cache = brw->screen->disk_cache;
+
+   if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
+       driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
+      /* Loader supports multithreading, and so do we. */
+      _mesa_glthread_init(ctx);
+   }
+
     return true;
  }
  
@@ -1015,6 +1164,18 @@ intelDestroyContext(__DRIcontext * driContextPriv)
        (struct brw_context *) driContextPriv->driverPrivate;
     struct gl_context *ctx = &brw->ctx;
  
+   GET_CURRENT_CONTEXT(curctx);
+
+   if (curctx == NULL) {
+      /* No current context, but we need one to release
+       * renderbuffer surface when we release framebuffer.
+       * So temporarily bind the context.
+       */
+      _mesa_make_current(ctx, NULL, NULL);
+   }
+
+   _mesa_glthread_destroy(&brw->ctx);
+
     _mesa_meta_free(&brw->ctx);
  
     if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
@@ -1025,23 +1186,24 @@ intelDestroyContext(__DRIcontext * driContextPriv)
        brw_destroy_shader_time(brw);
     }
  
-   if (brw->gen >= 6)
-      blorp_finish(&brw->blorp);
+   blorp_finish(&brw->blorp);
  
     brw_destroy_state(brw);
     brw_draw_destroy(brw);
  
     brw_bo_unreference(brw->curbe.curbe_bo);
-   if (brw->vs.base.scratch_bo)
-      brw_bo_unreference(brw->vs.base.scratch_bo);
-   if (brw->tcs.base.scratch_bo)
-      brw_bo_unreference(brw->tcs.base.scratch_bo);
-   if (brw->tes.base.scratch_bo)
-      brw_bo_unreference(brw->tes.base.scratch_bo);
-   if (brw->gs.base.scratch_bo)
-      brw_bo_unreference(brw->gs.base.scratch_bo);
-   if (brw->wm.base.scratch_bo)
-      brw_bo_unreference(brw->wm.base.scratch_bo);
+
+   brw_bo_unreference(brw->vs.base.scratch_bo);
+   brw_bo_unreference(brw->tcs.base.scratch_bo);
+   brw_bo_unreference(brw->tes.base.scratch_bo);
+   brw_bo_unreference(brw->gs.base.scratch_bo);
+   brw_bo_unreference(brw->wm.base.scratch_bo);
+
+   brw_bo_unreference(brw->vs.base.push_const_bo);
+   brw_bo_unreference(brw->tcs.base.push_const_bo);
+   brw_bo_unreference(brw->tes.base.push_const_bo);
+   brw_bo_unreference(brw->gs.base.push_const_bo);
+   brw_bo_unreference(brw->wm.base.push_const_bo);
  
     brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
  
@@ -1065,7 +1227,7 @@ intelDestroyContext(__DRIcontext * driContextPriv)
     driDestroyOptionCache(&brw->optionCache);
  
     /* free the Mesa context */
-   _mesa_free_context_data(&brw->ctx);
+   _mesa_free_context_data(&brw->ctx, true);
  
     ralloc_free(brw);
     driContextPriv->driverPrivate = NULL;
@@ -1074,6 +1236,9 @@ intelDestroyContext(__DRIcontext * driContextPriv)
  GLboolean
  intelUnbindContext(__DRIcontext * driContextPriv)
  {
+   struct gl_context *ctx = driContextPriv->driverPrivate;
+   _mesa_glthread_finish(ctx);
+
     /* Unset current context and dispath table */
     _mesa_make_current(NULL, NULL, NULL);
  
@@ -1105,8 +1270,8 @@ intelUnbindContext(__DRIcontext * driContextPriv)
   *
   * Unfortunately, renderbuffer setup happens before a context is created.  So
   * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
- * context (without an sRGB visual, though we don't have sRGB visuals exposed
- * yet), we go turn that back off before anyone finds out.
+ * context (without an sRGB visual), we go turn that back off before anyone
+ * finds out.
   */
  static void
  intel_gles3_srgb_workaround(struct brw_context *brw,
@@ -1117,15 +1282,19 @@ intel_gles3_srgb_workaround(struct brw_context *brw,
     if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
        return;
  
-   /* Some day when we support the sRGB capable bit on visuals available for
-    * GLES, we'll need to respect that and not disable things here.
-    */
-   fb->Visual.sRGBCapable = false;
     for (int i = 0; i < BUFFER_COUNT; i++) {
        struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
+
+      /* Check if sRGB was specifically asked for. */
+      struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
+      if (irb && irb->need_srgb)
+         return;
+
        if (rb)
           rb->Format = _mesa_get_srgb_format_linear(rb->Format);
     }
+   /* Disable sRGB from framebuffers that are not compatible. */
+   fb->Visual.sRGBCapable = false;
  }
  
  GLboolean
@@ -1134,21 +1303,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
                   __DRIdrawable * driReadPriv)
  {
     struct brw_context *brw;
-   GET_CURRENT_CONTEXT(curCtx);
  
     if (driContextPriv)
        brw = (struct brw_context *) driContextPriv->driverPrivate;
     else
        brw = NULL;
  
-   /* According to the glXMakeCurrent() man page: "Pending commands to
-    * the previous context, if any, are flushed before it is released."
-    * But only flush if we're actually changing contexts.
-    */
-   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
-      _mesa_flush(curCtx);
-   }
-
     if (driContextPriv) {
        struct gl_context *ctx = &brw->ctx;
        struct gl_framebuffer *fb, *readFb;
@@ -1182,6 +1342,8 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
  
        _mesa_make_current(ctx, fb, readFb);
     } else {
+      GET_CURRENT_CONTEXT(ctx);
+      _mesa_glthread_finish(ctx);
        _mesa_make_current(NULL, NULL, NULL);
     }
  
@@ -1192,7 +1354,9 @@ void
  intel_resolve_for_dri2_flush(struct brw_context *brw,
                               __DRIdrawable *drawable)
  {
-   if (brw->gen < 6) {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen < 6) {
        /* MSAA and fast color clear are not supported, so don't waste time
         * checking whether a resolve is needed.
         */
@@ -1214,12 +1378,27 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
        rb = intel_get_renderbuffer(fb, buffers[i]);
        if (rb == NULL || rb->mt == NULL)
           continue;
-      if (rb->mt->num_samples <= 1) {
+      if (rb->mt->surf.samples == 1) {
           assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
                  rb->layer_count == 1);
-         intel_miptree_prepare_access(brw, rb->mt, 0, 1, 0, 1, false, false);
+         intel_miptree_prepare_external(brw, rb->mt);
        } else {
           intel_renderbuffer_downsample(brw, rb);
+
+         /* Call prepare_external on the single-sample miptree to do any
+          * needed resolves prior to handing it off to the window system.
+          * This is needed in the case that rb->singlesample_mt is Y-tiled
+          * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
+          * this case, the MSAA resolve above will write compressed data into
+          * rb->singlesample_mt.
+          *
+          * TODO: Some day, if we decide to care about the tiny performance
+          * hit we're taking by doing the MSAA resolve and then a CCS resolve,
+          * we could detect this case and just allocate the single-sampled
+          * miptree without aux.  However, that would be a lot of plumbing and
+          * this is a rather exotic case so it's not really worth it.
+          */
+         intel_miptree_prepare_external(brw, rb->singlesample_mt);
        }
     }
  }
@@ -1355,6 +1534,11 @@ intel_prepare_render(struct brw_context *brw)
      */
     if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
        brw->front_buffer_dirty = true;
+
+   if (brw->is_shared_buffer_bound) {
+      /* Subsequent rendering will probably dirty the shared buffer. */
+      brw->is_shared_buffer_dirty = true;
+   }
  }
  
  /**
@@ -1504,10 +1688,36 @@ intel_process_dri2_buffer(struct brw_context *brw,
        return;
     }
  
-   if (!intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
+   uint32_t tiling, swizzle;
+   brw_bo_get_tiling(bo, &tiling, &swizzle);
+
+   struct intel_mipmap_tree *mt =
+      intel_miptree_create_for_bo(brw,
+                                  bo,
+                                  intel_rb_format(rb),
+                                  0,
+                                  drawable->w,
+                                  drawable->h,
+                                  1,
+                                  buffer->pitch,
+                                  isl_tiling_from_i915_tiling(tiling),
+                                  MIPTREE_CREATE_DEFAULT);
+   if (!mt) {
+      brw_bo_unreference(bo);
+      return;
+   }
+
+   /* We got this BO from X11.  We cana't assume that we have coherent texture
+    * access because X may suddenly decide to use it for scan-out which would
+    * destroy coherency.
+    */
+   bo->cache_coherent = false;
+
+   if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
                                                   drawable->w, drawable->h,
                                                   buffer->pitch)) {
        brw_bo_unreference(bo);
+      intel_miptree_release(&mt);
        return;
     }
  
@@ -1562,19 +1772,69 @@ intel_update_image_buffer(struct brw_context *intel,
     else
        last_mt = rb->singlesample_mt;
  
-   if (last_mt && last_mt->bo == buffer->bo)
+   if (last_mt && last_mt->bo == buffer->bo) {
+      if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
+         intel_miptree_make_shareable(intel, last_mt);
+      }
+      return;
+   }
+
+   /* Only allow internal compression if samples == 0.  For multisampled
+    * window system buffers, the only thing the single-sampled buffer is used
+    * for is as a resolve target.  If we do any compression beyond what is
+    * supported by the window system, we will just have to resolve so it's
+    * probably better to just not bother.
+    */
+   const bool allow_internal_aux = (num_samples == 0);
+
+   struct intel_mipmap_tree *mt =
+      intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
+                                         intel_rb_format(rb),
+                                         allow_internal_aux);
+   if (!mt)
        return;
  
-   if (!intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
+   if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
                                                   buffer->width, buffer->height,
-                                                 buffer->pitch))
+                                                 buffer->pitch)) {
+      intel_miptree_release(&mt);
        return;
+   }
  
     if (_mesa_is_front_buffer_drawing(fb) &&
         buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
         rb->Base.Base.NumSamples > 1) {
        intel_renderbuffer_upsample(intel, rb);
     }
+
+   if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
+      /* The compositor and the application may access this image
+       * concurrently. The display hardware may even scanout the image while
+       * the GPU is rendering to it.  Aux surfaces cause difficulty with
+       * concurrent access, so permanently disable aux for this miptree.
+       *
+       * Perhaps we could improve overall application performance by
+       * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
+       * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
+       * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
+       * approach to be highly dependent on the application's GL usage.
+       *
+       * I [chadv] expect clever disabling/reenabling to be counterproductive
+       * in the use cases I care about: applications that render nearly
+       * realtime handwriting to the surface while possibly undergiong
+       * simultaneously scanout as a display plane. The app requires low
+       * render latency. Even though the app spends most of its time in
+       * shared-buffer mode, it also frequently transitions between
+       * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
+       * mode.  Visual sutter during the transitions should be avoided.
+       *
+       * In this case, I [chadv] believe reducing the GPU workload at
+       * shared-buffer/double-buffer transitions would offer a smoother app
+       * experience than any savings due to aux compression. But I've
+       * collected no data to prove my theory.
+       */
+      intel_miptree_make_shareable(intel, mt);
+   }
  }
  
  static void
@@ -1635,4 +1895,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
                                  images.back,
                                  __DRI_IMAGE_BUFFER_BACK);
     }
+
+   if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
+      assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
+      drawable->w = images.back->width;
+      drawable->h = images.back->height;
+      intel_update_image_buffer(brw,
+                                drawable,
+                                back_rb,
+                                images.back,
+                                __DRI_IMAGE_BUFFER_SHARED);
+      brw->is_shared_buffer_bound = true;
+   } else {
+      brw->is_shared_buffer_bound = false;
+      brw->is_shared_buffer_dirty = false;
+   }
  }