i965: Use create_for_dri_image in intel_update_image_buffer

[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index 5e3c9d169b356e19e671fa8c806b9b2477820ccf..5c57b864c29d7e1b8fcc5a3c480cd38ff3485f02 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -31,6 +31,7 @@
    */
  
  
+#include "compiler/nir/nir.h"
  #include "main/api_exec.h"
  #include "main/context.h"
  #include "main/fbobject.h"
@@ -42,6 +43,8 @@
  #include "main/vtxfmt.h"
  #include "main/texobj.h"
  #include "main/framebuffer.h"
+#include "main/stencil.h"
+#include "main/state.h"
  
  #include "vbo/vbo_context.h"
  
@@ -51,7 +54,7 @@
  
  #include "brw_context.h"
  #include "brw_defines.h"
-#include "brw_compiler.h"
+#include "brw_blorp.h"
  #include "brw_draw.h"
  #include "brw_state.h"
  
@@ -79,9 +82,9 @@
  const char *const brw_vendor_string = "Intel Open Source Technology Center";
  
  static const char *
-get_bsw_model(const struct intel_screen *intelScreen)
+get_bsw_model(const struct intel_screen *screen)
  {
-   switch (intelScreen->eu_total) {
+   switch (screen->eu_total) {
     case 16:
        return "405";
     case 12:
@@ -92,13 +95,13 @@ get_bsw_model(const struct intel_screen *intelScreen)
  }
  
  const char *
-brw_get_renderer_string(const struct intel_screen *intelScreen)
+brw_get_renderer_string(const struct intel_screen *screen)
  {
     const char *chipset;
     static char buffer[128];
     char *bsw = NULL;
  
-   switch (intelScreen->deviceID) {
+   switch (screen->deviceID) {
  #undef CHIPSET
  #define CHIPSET(id, symbol, str) case id: chipset = str; break;
  #include "pci_ids/i965_pci_ids.h"
@@ -108,11 +111,11 @@ brw_get_renderer_string(const struct intel_screen *intelScreen)
     }
  
     /* Braswell branding is funny, so we have to fix it up here */
-   if (intelScreen->deviceID == 0x22B1) {
+   if (screen->deviceID == 0x22B1) {
        bsw = strdup(chipset);
        char *needle = strstr(bsw, "XXX");
        if (needle) {
-         memcpy(needle, get_bsw_model(intelScreen), 3);
+         memcpy(needle, get_bsw_model(screen), 3);
           chipset = bsw;
        }
     }
@@ -133,7 +136,7 @@ intel_get_string(struct gl_context * ctx, GLenum name)
  
     case GL_RENDERER:
        return
-         (GLubyte *) brw_get_renderer_string(brw->intelScreen);
+         (GLubyte *) brw_get_renderer_string(brw->screen);
  
     default:
        return NULL;
@@ -163,137 +166,35 @@ intel_update_framebuffer(struct gl_context *ctx,
     /* Quantize the derived default number of samples
      */
     fb->DefaultGeometry._NumSamples =
-      intel_quantize_num_samples(brw->intelScreen,
+      intel_quantize_num_samples(brw->screen,
                                   fb->DefaultGeometry.NumSamples);
  }
  
-/* On Gen9 color buffers may be compressed by the hardware (lossless
- * compression). There are, however, format restrictions and care needs to be
- * taken that the sampler engine is capable for re-interpreting a buffer with
- * format different the buffer was originally written with.
- *
- * For example, SRGB formats are not compressible and the sampler engine isn't
- * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
- * color buffer needs to be resolved so that the sampling surface can be
- * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
- * set).
- */
-static bool
-intel_texture_view_requires_resolve(struct brw_context *brw,
-                                    struct intel_texture_object *intel_tex)
-{
-   if (brw->gen < 9 ||
-       !intel_miptree_is_lossless_compressed(brw, intel_tex->mt))
-     return false;
-
-   const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format);
-
-   if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo,
-                                                brw_format))
-      return false;
-
-   perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
-              _mesa_get_format_name(intel_tex->_Format),
-              _mesa_get_format_name(intel_tex->mt->format));
-
-   return true;
-}
-
  static void
-intel_update_state(struct gl_context * ctx, GLuint new_state)
+intel_update_state(struct gl_context * ctx)
  {
+   GLuint new_state = ctx->NewState;
     struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_object *tex_obj;
-   struct intel_renderbuffer *depth_irb;
  
     if (ctx->swrast_context)
        _swrast_InvalidateState(ctx, new_state);
-   _vbo_InvalidateState(ctx, new_state);
  
     brw->NewGLState |= new_state;
  
-   _mesa_unlock_context_textures(ctx);
-
-   /* Resolve the depth buffer's HiZ buffer. */
-   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-   if (depth_irb)
-      intel_renderbuffer_resolve_hiz(brw, depth_irb);
-
-   /* Resolve depth buffer and render cache of each enabled texture. */
-   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
-   for (int i = 0; i <= maxEnabledUnit; i++) {
-      if (!ctx->Texture.Unit[i]._Current)
-        continue;
-      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
-      if (!tex_obj || !tex_obj->mt)
-        continue;
-      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
-      /* Sampling engine understands lossless compression and resolving
-       * those surfaces should be skipped for performance reasons.
-       */
-      const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
-                           0 : INTEL_MIPTREE_IGNORE_CCS_E;
-      intel_miptree_resolve_color(brw, tex_obj->mt, flags);
-      brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
-   }
+   if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
+      _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
  
-   /* Resolve color for each active shader image. */
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      const struct gl_linked_shader *shader =
-         ctx->_Shader->CurrentProgram[i] ?
-            ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
-
-      if (unlikely(shader && shader->NumImages)) {
-         for (unsigned j = 0; j < shader->NumImages; j++) {
-            struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
-            tex_obj = intel_texture_object(u->TexObj);
-
-            if (tex_obj && tex_obj->mt) {
-               /* Access to images is implemented using indirect messages
-                * against data port. Normal render target write understands
-                * lossless compression but unfortunately the typed/untyped
-                * read/write interface doesn't. Therefore the compressed
-                * surfaces need to be resolved prior to accessing them.
-                */
-               intel_miptree_resolve_color(brw, tex_obj->mt, 0);
-               brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
-            }
-         }
-      }
+   if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
+      brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
+      brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
+      brw->stencil_write_enabled =
+         _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
     }
  
-   /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
-    * single-sampled color renderbuffers because the CCS buffer isn't
-    * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
-    * enabled because otherwise the surface state will be programmed with the
-    * linear equivalent format anyway.
-    */
-   if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
-      struct gl_framebuffer *fb = ctx->DrawBuffer;
-      for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
-         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
-
-         if (rb == NULL)
-            continue;
-
-         struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-         struct intel_mipmap_tree *mt = irb->mt;
-
-         if (mt == NULL ||
-             mt->num_samples > 1 ||
-             _mesa_get_srgb_format_linear(mt->format) == mt->format)
-               continue;
-
-         /* Lossless compression is not supported for SRGB formats, it
-          * should be impossible to get here with such surfaces.
-          */
-         assert(!intel_miptree_is_lossless_compressed(brw, mt));
-         intel_miptree_resolve_color(brw, mt, 0);
-         brw_render_cache_set_check_flush(brw, mt->bo);
-      }
-   }
+   if (new_state & _NEW_POLYGON)
+      brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
  
-   _mesa_lock_context_textures(ctx);
+   intel_prepare_render(brw);
  
     if (new_state & _NEW_BUFFERS) {
        intel_update_framebuffer(ctx, ctx->DrawBuffer);
@@ -310,10 +211,10 @@ intel_flush_front(struct gl_context *ctx)
     struct brw_context *brw = brw_context(ctx);
     __DRIcontext *driContext = brw->driContext;
     __DRIdrawable *driDrawable = driContext->driDrawablePriv;
-   __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
  
     if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
-      if (flushFront(screen) && driDrawable &&
+      if (flushFront(dri_screen) && driDrawable &&
            driDrawable->loaderPrivate) {
  
           /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
@@ -326,7 +227,7 @@ intel_flush_front(struct gl_context *ctx)
           intel_resolve_for_dri2_flush(brw, driDrawable);
           intel_batchbuffer_flush(brw);
  
-         flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
+         flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
  
           /* We set the dirty bit in intel_prepare_render() if we're
            * front buffer rendering once we get there.
@@ -355,7 +256,7 @@ intel_finish(struct gl_context * ctx)
     intel_glFlush(ctx);
  
     if (brw->batch.last_bo)
-      drm_intel_bo_wait_rendering(brw->batch.last_bo);
+      brw_bo_wait_rendering(brw->batch.last_bo);
  }
  
  static void
@@ -388,7 +289,7 @@ brw_init_driver_functions(struct brw_context *brw,
     intelInitBufferFuncs(functions);
     intelInitPixelFuncs(functions);
     intelInitBufferObjectFuncs(functions);
-   intel_init_syncobj_functions(functions);
+   brw_init_syncobj_functions(functions);
     brw_init_object_purgeable_functions(functions);
  
     brwInitFragProgFuncs( functions );
@@ -400,14 +301,13 @@ brw_init_driver_functions(struct brw_context *brw,
     else
        gen4_init_queryobj_functions(functions);
     brw_init_compute_functions(functions);
-   if (brw->gen >= 7)
-      brw_init_conditional_render_functions(functions);
+   brw_init_conditional_render_functions(functions);
  
     functions->QueryInternalFormat = brw_query_internal_format;
  
     functions->NewTransformFeedback = brw_new_transform_feedback;
     functions->DeleteTransformFeedback = brw_delete_transform_feedback;
-   if (brw->intelScreen->has_mi_math_and_lrr) {
+   if (can_do_mi_math_and_lrr(brw->screen)) {
        functions->BeginTransformFeedback = hsw_begin_transform_feedback;
        functions->EndTransformFeedback = hsw_end_transform_feedback;
        functions->PauseTransformFeedback = hsw_pause_transform_feedback;
@@ -422,6 +322,10 @@ brw_init_driver_functions(struct brw_context *brw,
     } else {
        functions->BeginTransformFeedback = brw_begin_transform_feedback;
        functions->EndTransformFeedback = brw_end_transform_feedback;
+      functions->PauseTransformFeedback = brw_pause_transform_feedback;
+      functions->ResumeTransformFeedback = brw_resume_transform_feedback;
+      functions->GetTransformFeedbackVertexCount =
+         brw_get_transform_feedback_vertex_count;
     }
  
     if (brw->gen >= 6)
@@ -432,7 +336,7 @@ static void
  brw_initialize_context_constants(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
-   const struct brw_compiler *compiler = brw->intelScreen->compiler;
+   const struct brw_compiler *compiler = brw->screen->compiler;
  
     const bool stage_exists[MESA_SHADER_STAGES] = {
        [MESA_SHADER_VERTEX] = true,
@@ -441,7 +345,7 @@ brw_initialize_context_constants(struct brw_context *brw)
        [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
        [MESA_SHADER_FRAGMENT] = true,
        [MESA_SHADER_COMPUTE] =
-         (ctx->API == API_OPENGL_CORE &&
+         ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) &&
            ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
           (ctx->API == API_OPENGLES2 &&
            ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
@@ -462,32 +366,50 @@ brw_initialize_context_constants(struct brw_context *brw)
     ctx->Const.MaxCombinedShaderOutputResources =
        MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
  
+   /* The timestamp register we can read for glGetTimestamp() is
+    * sometimes only 32 bits, before scaling to nanoseconds (depending
+    * on kernel).
+    *
+    * Once scaled to nanoseconds the timestamp would roll over at a
+    * non-power-of-two, so an application couldn't use
+    * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
+    * report 36 bits and truncate at that (rolling over 5 times as
+    * often as the HW counter), and when the 32-bit counter rolls
+    * over, it happens to also be at a rollover in the reported value
+    * from near (1<<36) to 0.
+    *
+    * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
+    * rolls over every ~69 seconds.
+    */
     ctx->Const.QueryCounterBits.Timestamp = 36;
  
     ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
     ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
-   ctx->Const.MaxRenderbufferSize = 8192;
-   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
-
-   /* On Sandy Bridge and prior, the "Render Target View Extent" field of
-    * RENDER_SURFACE_STATE is only 9 bits so the largest 3-D texture we can do
-    * a layered render into has a depth of 512.  On Iron Lake and earlier, we
-    * don't support layered rendering and we use manual offsetting to render
-    * into the different layers so this doesn't matter.  On Sandy Bridge,
-    * however, we do support layered rendering so this is a problem.
-    */
-   ctx->Const.Max3DTextureLevels = brw->gen == 6 ? 10 /* 512 */ : 12; /* 2048 */
-
-   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+   if (brw->gen >= 7) {
+      ctx->Const.MaxRenderbufferSize = 16384;
+      ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
+      ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
+   } else {
+      ctx->Const.MaxRenderbufferSize = 8192;
+      ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+      ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+   }
+   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
     ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
     ctx->Const.MaxTextureMbytes = 1536;
-   ctx->Const.MaxTextureRectSize = 1 << 12;
+   ctx->Const.MaxTextureRectSize = brw->gen >= 7 ? 16384 : 8192;
     ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+   ctx->Const.MaxTextureLodBias = 15.0;
     ctx->Const.StripTextureBorder = true;
-   if (brw->gen >= 7)
+   if (brw->gen >= 7) {
        ctx->Const.MaxProgramTextureGatherComponents = 4;
-   else if (brw->gen == 6)
+      ctx->Const.MinProgramTextureGatherOffset = -32;
+      ctx->Const.MaxProgramTextureGatherOffset = 31;
+   } else if (brw->gen == 6) {
        ctx->Const.MaxProgramTextureGatherComponents = 1;
+      ctx->Const.MinProgramTextureGatherOffset = -8;
+      ctx->Const.MaxProgramTextureGatherOffset = 7;
+   }
  
     ctx->Const.MaxUniformBlockSize = 65536;
  
@@ -544,10 +466,10 @@ brw_initialize_context_constants(struct brw_context *brw)
        BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
  
     ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
-      !brw->intelScreen->has_mi_math_and_lrr;
+      !can_do_mi_math_and_lrr(brw->screen);
  
     int max_samples;
-   const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
+   const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
     const int clamp_max_samples =
        driQueryOptioni(&brw->optionCache, "clamp_max_samples");
  
@@ -608,7 +530,7 @@ brw_initialize_context_constants(struct brw_context *brw)
     if (brw->gen >= 5 || brw->is_g4x)
        ctx->Const.MaxClipPlanes = 8;
  
-   ctx->Const.LowerTessLevel = true;
+   ctx->Const.GLSLTessLevelsAsInputs = true;
     ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
     ctx->Const.LowerTESPatchVerticesIn = true;
     ctx->Const.PrimitiveRestartForPatches = true;
@@ -721,7 +643,7 @@ brw_initialize_context_constants(struct brw_context *brw)
     /* We want the GLSL compiler to emit code that uses condition codes */
     for (int i = 0; i < MESA_SHADER_STAGES; i++) {
        ctx->Const.ShaderCompilerOptions[i] =
-         brw->intelScreen->compiler->glsl_compiler_options[i];
+         brw->screen->compiler->glsl_compiler_options[i];
     }
  
     if (brw->gen >= 7) {
@@ -729,8 +651,8 @@ brw_initialize_context_constants(struct brw_context *brw)
        ctx->Const.MaxViewportHeight = 32768;
     }
  
-   /* ARB_viewport_array */
-   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
+   /* ARB_viewport_array, OES_viewport_array */
+   if (brw->gen >= 6) {
        ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
        ctx->Const.ViewportSubpixelBits = 0;
  
@@ -749,15 +671,39 @@ brw_initialize_context_constants(struct brw_context *brw)
     ctx->Const.MaxFramebufferHeight = 16384;
     ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
     ctx->Const.MaxFramebufferSamples = max_samples;
+
+   /* OES_primitive_bounding_box */
+   ctx->Const.NoPrimitiveBoundingBoxOutput = true;
  }
  
  static void
-brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
+brw_initialize_cs_context_constants(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
+   const struct intel_screen *screen = brw->screen;
+   struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   /* FINISHME: Do this for all platforms that the kernel supports */
+   if (brw->is_cherryview &&
+       screen->subslice_total > 0 && screen->eu_total > 0) {
+      /* Logical CS threads = EUs per subslice * 7 threads per EU */
+      uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
+
+      /* Fuse configurations may give more threads than expected, never less. */
+      if (max_cs_threads > devinfo->max_cs_threads)
+         devinfo->max_cs_threads = max_cs_threads;
+   }
+
     /* Maximum number of scalar compute shader invocations that can be run in
      * parallel in the same subslice assuming SIMD32 dispatch.
+    *
+    * We don't advertise more than 64 threads, because we are limited to 64 by
+    * our usage of thread_width_max in the gpgpu walker command. This only
+    * currently impacts Haswell, which otherwise might be able to advertise 70
+    * threads. With SIMD32 and 64 threads, Haswell still provides twice the
+    * required the number of invocation needed for ARB_compute_shader.
      */
+   const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
     const uint32_t max_invocations = 32 * max_threads;
     ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
     ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
@@ -779,7 +725,7 @@ brw_process_driconf_options(struct brw_context *brw)
     struct gl_context *ctx = &brw->ctx;
  
     driOptionCache *options = &brw->optionCache;
-   driParseConfigFiles(options, &brw->intelScreen->optionCache,
+   driParseConfigFiles(options, &brw->screen->optionCache,
                         brw->driContext->driScreenPriv->myNum, "i965");
  
     int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
@@ -787,11 +733,11 @@ brw_process_driconf_options(struct brw_context *brw)
     case DRI_CONF_BO_REUSE_DISABLED:
        break;
     case DRI_CONF_BO_REUSE_ALL:
-      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
+      brw_bufmgr_enable_reuse(brw->bufmgr);
        break;
     }
  
-   if (!driQueryOptionb(options, "hiz")) {
+   if (INTEL_DEBUG & DEBUG_NO_HIZ) {
         brw->has_hiz = false;
         /* On gen6, you can only do separate stencil with HIZ. */
         if (brw->gen == 6)
@@ -816,17 +762,31 @@ brw_process_driconf_options(struct brw_context *brw)
     brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
  
     if (driQueryOptionb(&brw->optionCache, "precise_trig"))
-      brw->intelScreen->compiler->precise_trig = true;
+      brw->screen->compiler->precise_trig = true;
  
     ctx->Const.ForceGLSLExtensionsWarn =
        driQueryOptionb(options, "force_glsl_extensions_warn");
  
+   ctx->Const.ForceGLSLVersion =
+      driQueryOptioni(options, "force_glsl_version");
+
     ctx->Const.DisableGLSLLineContinuations =
        driQueryOptionb(options, "disable_glsl_line_continuations");
  
     ctx->Const.AllowGLSLExtensionDirectiveMidShader =
        driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
  
+   ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
+      driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
+
+   ctx->Const.AllowHigherCompatVersion =
+      driQueryOptionb(options, "allow_higher_compat_version");
+
+   ctx->Const.ForceGLSLAbsSqrt =
+      driQueryOptionb(options, "force_glsl_abs_sqrt");
+
+   ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
+
     brw->dual_color_blend_by_location =
        driQueryOptionb(options, "dual_color_blend_by_location");
  }
@@ -842,10 +802,9 @@ brwCreateContext(gl_api api,
                   unsigned *dri_ctx_error,
                  void *sharedContextPrivate)
  {
-   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
     struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
-   struct intel_screen *screen = sPriv->driverPrivate;
-   const struct brw_device_info *devinfo = screen->devinfo;
+   struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
+   const struct gen_device_info *devinfo = &screen->devinfo;
     struct dd_function_table functions;
  
     /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
@@ -871,7 +830,7 @@ brwCreateContext(gl_api api,
  
     driContextPriv->driverPrivate = brw;
     brw->driContext = driContextPriv;
-   brw->intelScreen = screen;
+   brw->screen = screen;
     brw->bufmgr = screen->bufmgr;
  
     brw->gen = devinfo->gen;
@@ -880,7 +839,7 @@ brwCreateContext(gl_api api,
     brw->is_baytrail = devinfo->is_baytrail;
     brw->is_haswell = devinfo->is_haswell;
     brw->is_cherryview = devinfo->is_cherryview;
-   brw->is_broxton = devinfo->is_broxton;
+   brw->is_broxton = devinfo->is_broxton || devinfo->is_geminilake;
     brw->has_llc = devinfo->has_llc;
     brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
     brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
@@ -894,6 +853,8 @@ brwCreateContext(gl_api api,
     brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
     brw->has_swizzling = screen->hw_has_swizzling;
  
+   isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling);
+
     brw->vs.base.stage = MESA_SHADER_VERTEX;
     brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
     brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
@@ -957,7 +918,7 @@ brwCreateContext(gl_api api,
     if (INTEL_DEBUG & DEBUG_PERF)
        brw->perf_debug = true;
  
-   brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
+   brw_initialize_cs_context_constants(brw);
     brw_initialize_context_constants(brw);
  
     ctx->Const.ResetStrategy = notify_reset
@@ -968,7 +929,7 @@ brwCreateContext(gl_api api,
  
     intel_fbo_init(brw);
  
-   intel_batchbuffer_init(brw);
+   intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc);
  
     if (brw->gen >= 6) {
        /* Create a new hardware context.  Using a hardware context means that
@@ -978,10 +939,10 @@ brwCreateContext(gl_api api,
         * This is required for transform feedback buffer offsets, query objects,
         * and also allows us to reduce how much state we have to emit.
         */
-      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
+      brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
  
        if (!brw->hw_ctx) {
-         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
+         fprintf(stderr, "Failed to create hardware context.\n");
           intelDestroyContext(driContextPriv);
           return false;
        }
@@ -999,44 +960,9 @@ brwCreateContext(gl_api api,
  
     brw_init_surface_formats(brw);
  
-   brw->max_vs_threads = devinfo->max_vs_threads;
-   brw->max_hs_threads = devinfo->max_hs_threads;
-   brw->max_ds_threads = devinfo->max_ds_threads;
-   brw->max_gs_threads = devinfo->max_gs_threads;
-   brw->max_wm_threads = devinfo->max_wm_threads;
-   /* FINISHME: Do this for all platforms that the kernel supports */
-   if (brw->is_cherryview &&
-       screen->subslice_total > 0 && screen->eu_total > 0) {
-      /* Logical CS threads = EUs per subslice * 7 threads per EU */
-      brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
+   brw_blorp_init(brw);
  
-      /* Fuse configurations may give more threads than expected, never less. */
-      if (brw->max_cs_threads < devinfo->max_cs_threads)
-         brw->max_cs_threads = devinfo->max_cs_threads;
-   } else {
-      brw->max_cs_threads = devinfo->max_cs_threads;
-   }
     brw->urb.size = devinfo->urb.size;
-   brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
-   brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
-   brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
-   brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
-   brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
-
-   /* Estimate the size of the mappable aperture into the GTT.  There's an
-    * ioctl to get the whole GTT size, but not one to get the mappable subset.
-    * It turns out it's basically always 256MB, though some ancient hardware
-    * was smaller.
-    */
-   uint32_t gtt_size = 256 * 1024 * 1024;
-
-   /* We don't want to map two objects such that a memcpy between them would
-    * just fault one mapping in and then the other over and over forever.  So
-    * we would need to divide the GTT size by 2.  Additionally, some GTT is
-    * taken up by things like the framebuffer and the ringbuffer and such, so
-    * be more conservative.
-    */
-   brw->max_gtt_map_object_size = gtt_size / 4;
  
     if (brw->gen == 6)
        brw->urb.gs_present = false;
@@ -1044,13 +970,11 @@ brwCreateContext(gl_api api,
     brw->prim_restart.in_progress = false;
     brw->prim_restart.enable_cut_index = false;
     brw->gs.enabled = false;
-   brw->sf.viewport_transform_enable = true;
+   brw->clip.viewport_count = 1;
  
     brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
  
-   brw->use_resource_streamer = screen->has_resource_streamer &&
-      (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
-       env_var_as_boolean("INTEL_USE_GATHER", false));
+   brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
  
     ctx->VertexProgram._MaintainTnlProgram = true;
     ctx->FragmentProgram._MaintainTexEnvProgram = true;
@@ -1062,8 +986,10 @@ brwCreateContext(gl_api api,
        brw->perf_debug = true;
     }
  
-   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
+   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
        ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
+      ctx->Const.RobustAccess = GL_TRUE;
+   }
  
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
        brw_init_shader_time(brw);
@@ -1073,9 +999,8 @@ brwCreateContext(gl_api api,
     _mesa_initialize_dispatch_tables(ctx);
     _mesa_initialize_vbo_vtxfmt(ctx);
  
-   if (ctx->Extensions.AMD_performance_monitor) {
-      brw_init_performance_monitors(brw);
-   }
+   if (ctx->Extensions.INTEL_performance_query)
+      brw_init_performance_queries(brw);
  
     vbo_use_buffer_objects(ctx);
     vbo_always_unmap_buffers(ctx);
@@ -1090,12 +1015,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
        (struct brw_context *) driContextPriv->driverPrivate;
     struct gl_context *ctx = &brw->ctx;
  
-   /* Dump a final BMP in case the application doesn't call SwapBuffers */
-   if (INTEL_DEBUG & DEBUG_AUB) {
-      intel_batchbuffer_flush(brw);
-      aub_dump_bmp(&brw->ctx);
-   }
-
     _mesa_meta_free(&brw->ctx);
  
     if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
@@ -1106,26 +1025,25 @@ intelDestroyContext(__DRIcontext * driContextPriv)
        brw_destroy_shader_time(brw);
     }
  
+   if (brw->gen >= 6)
+      blorp_finish(&brw->blorp);
+
     brw_destroy_state(brw);
     brw_draw_destroy(brw);
  
-   drm_intel_bo_unreference(brw->curbe.curbe_bo);
+   brw_bo_unreference(brw->curbe.curbe_bo);
     if (brw->vs.base.scratch_bo)
-      drm_intel_bo_unreference(brw->vs.base.scratch_bo);
+      brw_bo_unreference(brw->vs.base.scratch_bo);
     if (brw->tcs.base.scratch_bo)
-      drm_intel_bo_unreference(brw->tcs.base.scratch_bo);
+      brw_bo_unreference(brw->tcs.base.scratch_bo);
     if (brw->tes.base.scratch_bo)
-      drm_intel_bo_unreference(brw->tes.base.scratch_bo);
+      brw_bo_unreference(brw->tes.base.scratch_bo);
     if (brw->gs.base.scratch_bo)
-      drm_intel_bo_unreference(brw->gs.base.scratch_bo);
+      brw_bo_unreference(brw->gs.base.scratch_bo);
     if (brw->wm.base.scratch_bo)
-      drm_intel_bo_unreference(brw->wm.base.scratch_bo);
-
-   gen7_reset_hw_bt_pool_offsets(brw);
-   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
-   brw->hw_bt_pool.bo = NULL;
+      brw_bo_unreference(brw->wm.base.scratch_bo);
  
-   drm_intel_gem_context_destroy(brw->hw_ctx);
+   brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
  
     if (ctx->swrast_context) {
        _swsetup_DestroyContext(&brw->ctx);
@@ -1137,10 +1055,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
        _swrast_DestroyContext(&brw->ctx);
  
     brw_fini_pipe_control(brw);
-   intel_batchbuffer_free(brw);
+   intel_batchbuffer_free(&brw->batch);
  
-   drm_intel_bo_unreference(brw->throttle_batch[1]);
-   drm_intel_bo_unreference(brw->throttle_batch[0]);
+   brw_bo_unreference(brw->throttle_batch[1]);
+   brw_bo_unreference(brw->throttle_batch[0]);
     brw->throttle_batch[1] = NULL;
     brw->throttle_batch[0] = NULL;
  
@@ -1296,10 +1214,13 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
        rb = intel_get_renderbuffer(fb, buffers[i]);
        if (rb == NULL || rb->mt == NULL)
           continue;
-      if (rb->mt->num_samples <= 1)
-         intel_miptree_resolve_color(brw, rb->mt, 0);
-      else
+      if (rb->mt->num_samples <= 1) {
+         assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
+                rb->layer_count == 1);
+         intel_miptree_prepare_access(brw, rb->mt, 0, 1, 0, 1, false, false);
+      } else {
           intel_renderbuffer_downsample(brw, rb);
+      }
     }
  }
  
@@ -1331,7 +1252,7 @@ intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
     struct gl_framebuffer *fb = drawable->driverPrivate;
     struct intel_renderbuffer *rb;
     __DRIbuffer *buffers = NULL;
-   int i, count;
+   int count;
     const char *region_name;
  
     /* Set this up front, so that in case our buffers get invalidated
@@ -1347,7 +1268,7 @@ intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
     if (buffers == NULL)
        return;
  
-   for (i = 0; i < count; i++) {
+   for (int i = 0; i < count; i++) {
         switch (buffers[i].attachment) {
         case __DRI_BUFFER_FRONT_LEFT:
             rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
@@ -1385,7 +1306,7 @@ void
  intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
  {
     struct brw_context *brw = context->driverPrivate;
-   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
  
     /* Set this up front, so that in case our buffers get invalidated
      * while we're getting new buffers, we don't clobber the stamp and
@@ -1395,7 +1316,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
     if (unlikely(INTEL_DEBUG & DEBUG_DRI))
        fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
  
-   if (screen->image.loader)
+   if (dri_screen->image.loader)
        intel_update_image_buffers(brw, drawable);
     else
        intel_update_dri2_buffers(brw, drawable);
@@ -1459,7 +1380,7 @@ intel_query_dri2_buffers(struct brw_context *brw,
                           __DRIbuffer **buffers,
                           int *buffer_count)
  {
-   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
     struct gl_framebuffer *fb = drawable->driverPrivate;
     int i = 0;
     unsigned attachments[8];
@@ -1503,12 +1424,13 @@ intel_query_dri2_buffers(struct brw_context *brw,
  
     assert(i <= ARRAY_SIZE(attachments));
  
-   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
-                                                        &drawable->w,
-                                                        &drawable->h,
-                                                        attachments, i / 2,
-                                                        buffer_count,
-                                                        drawable->loaderPrivate);
+   *buffers =
+      dri_screen->dri2.loader->getBuffersWithFormat(drawable,
+                                                    &drawable->w,
+                                                    &drawable->h,
+                                                    attachments, i / 2,
+                                                    buffer_count,
+                                                    drawable->loaderPrivate);
  }
  
  /**
@@ -1521,7 +1443,7 @@ intel_query_dri2_buffers(struct brw_context *brw,
   *    DRI2BufferDepthStencil are handled as special cases.
   *
   * \param buffer_name is a human readable name, such as "dri2 front buffer",
- *        that is passed to drm_intel_bo_gem_create_from_name().
+ *        that is passed to brw_bo_gem_create_from_name().
   *
   * \see intel_update_renderbuffers()
   */
@@ -1533,7 +1455,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
                            const char *buffer_name)
  {
     struct gl_framebuffer *fb = drawable->driverPrivate;
-   drm_intel_bo *bo;
+   struct brw_bo *bo;
  
     if (!rb)
        return;
@@ -1554,10 +1476,10 @@ intel_process_dri2_buffer(struct brw_context *brw,
     if (last_mt) {
         /* The bo already has a name because the miptree was created by a
         * previous call to intel_process_dri2_buffer(). If a bo already has a
-       * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
+       * name, then brw_bo_flink() is a low-cost getter.  It does not
         * create a new name.
         */
-      drm_intel_bo_flink(last_mt->bo, &old_name);
+      brw_bo_flink(last_mt->bo, &old_name);
     }
  
     if (old_name == buffer->name)
@@ -1570,7 +1492,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
                buffer->cpp, buffer->pitch);
     }
  
-   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
+   bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
                                            buffer->name);
     if (!bo) {
        fprintf(stderr,
@@ -1582,9 +1504,28 @@ intel_process_dri2_buffer(struct brw_context *brw,
        return;
     }
  
-   intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
-                                            drawable->w, drawable->h,
-                                            buffer->pitch);
+   struct intel_mipmap_tree *mt =
+      intel_miptree_create_for_bo(brw,
+                                  bo,
+                                  intel_rb_format(rb),
+                                  0,
+                                  drawable->w,
+                                  drawable->h,
+                                  1,
+                                  buffer->pitch,
+                                  MIPTREE_LAYOUT_FOR_SCANOUT);
+   if (!mt) {
+      brw_bo_unreference(bo);
+      return;
+   }
+
+   if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
+                                                 drawable->w, drawable->h,
+                                                 buffer->pitch)) {
+      brw_bo_unreference(bo);
+      intel_miptree_release(&mt);
+      return;
+   }
  
     if (_mesa_is_front_buffer_drawing(fb) &&
         (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
@@ -1595,7 +1536,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
  
     assert(rb->mt);
  
-   drm_intel_bo_unreference(bo);
+   brw_bo_unreference(bo);
  }
  
  /**
@@ -1640,9 +1581,30 @@ intel_update_image_buffer(struct brw_context *intel,
     if (last_mt && last_mt->bo == buffer->bo)
        return;
  
-   intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
-                                            buffer->width, buffer->height,
-                                            buffer->pitch);
+   enum isl_colorspace colorspace;
+   switch (_mesa_get_format_color_encoding(intel_rb_format(rb))) {
+   case GL_SRGB:
+      colorspace = ISL_COLORSPACE_SRGB;
+      break;
+   case GL_LINEAR:
+      colorspace = ISL_COLORSPACE_LINEAR;
+      break;
+   default:
+      unreachable("Invalid color encoding");
+   }
+
+   struct intel_mipmap_tree *mt =
+      intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
+                                         colorspace, true);
+   if (!mt)
+      return;
+
+   if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
+                                                 buffer->width, buffer->height,
+                                                 buffer->pitch)) {
+      intel_miptree_release(&mt);
+      return;
+   }
  
     if (_mesa_is_front_buffer_drawing(fb) &&
         buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
@@ -1655,11 +1617,11 @@ static void
  intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
  {
     struct gl_framebuffer *fb = drawable->driverPrivate;
-   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
     struct intel_renderbuffer *front_rb;
     struct intel_renderbuffer *back_rb;
     struct __DRIimageList images;
-   unsigned int format;
+   mesa_format format;
     uint32_t buffer_mask = 0;
     int ret;
  
@@ -1681,12 +1643,12 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
     if (back_rb)
        buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
  
-   ret = screen->image.loader->getBuffers(drawable,
-                                          driGLFormatToImageFormat(format),
-                                          &drawable->dri2.stamp,
-                                          drawable->loaderPrivate,
-                                          buffer_mask,
-                                          &images);
+   ret = dri_screen->image.loader->getBuffers(drawable,
+                                              driGLFormatToImageFormat(format),
+                                              &drawable->dri2.stamp,
+                                              drawable->loaderPrivate,
+                                              buffer_mask,
+                                              &images);
     if (!ret)
        return;
  
@@ -1699,6 +1661,7 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
                                  images.front,
                                  __DRI_IMAGE_BUFFER_FRONT);
     }
+
     if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
        drawable->w = images.back->width;
        drawable->h = images.back->height;