i965: Make INTEL_DEBUG=bat decode VS/CLIP/GS/SF/WM/CC_STATE on Gen4-5.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
index 3af45551e991ec567bef565a26a51fd2a905ae4b..1247d0355f688fcd331e3ac03e88f0365a574c49 100644 (file)
@@ -31,6 +31,7 @@
   */
 
 
+#include "compiler/nir/nir.h"
 #include "main/api_exec.h"
 #include "main/context.h"
 #include "main/fbobject.h"
@@ -52,7 +53,6 @@
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_blorp.h"
-#include "brw_compiler.h"
 #include "brw_draw.h"
 #include "brw_state.h"
 
@@ -80,9 +80,9 @@
 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 
 static const char *
-get_bsw_model(const struct intel_screen *intelScreen)
+get_bsw_model(const struct intel_screen *screen)
 {
-   switch (intelScreen->eu_total) {
+   switch (screen->eu_total) {
    case 16:
       return "405";
    case 12:
@@ -93,13 +93,13 @@ get_bsw_model(const struct intel_screen *intelScreen)
 }
 
 const char *
-brw_get_renderer_string(const struct intel_screen *intelScreen)
+brw_get_renderer_string(const struct intel_screen *screen)
 {
    const char *chipset;
    static char buffer[128];
    char *bsw = NULL;
 
-   switch (intelScreen->deviceID) {
+   switch (screen->deviceID) {
 #undef CHIPSET
 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 #include "pci_ids/i965_pci_ids.h"
@@ -109,11 +109,11 @@ brw_get_renderer_string(const struct intel_screen *intelScreen)
    }
 
    /* Braswell branding is funny, so we have to fix it up here */
-   if (intelScreen->deviceID == 0x22B1) {
+   if (screen->deviceID == 0x22B1) {
       bsw = strdup(chipset);
       char *needle = strstr(bsw, "XXX");
       if (needle) {
-         memcpy(needle, get_bsw_model(intelScreen), 3);
+         memcpy(needle, get_bsw_model(screen), 3);
          chipset = bsw;
       }
    }
@@ -134,7 +134,7 @@ intel_get_string(struct gl_context * ctx, GLenum name)
 
    case GL_RENDERER:
       return
-         (GLubyte *) brw_get_renderer_string(brw->intelScreen);
+         (GLubyte *) brw_get_renderer_string(brw->screen);
 
    default:
       return NULL;
@@ -164,12 +164,12 @@ intel_update_framebuffer(struct gl_context *ctx,
    /* Quantize the derived default number of samples
     */
    fb->DefaultGeometry._NumSamples =
-      intel_quantize_num_samples(brw->intelScreen,
+      intel_quantize_num_samples(brw->screen,
                                  fb->DefaultGeometry.NumSamples);
 }
 
 static bool
-intel_disable_rb_aux_buffer(struct brw_context *brw, const drm_intel_bo *bo)
+intel_disable_rb_aux_buffer(struct brw_context *brw, const struct brw_bo *bo)
 {
    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
    bool found = false;
@@ -205,10 +205,9 @@ intel_texture_view_requires_resolve(struct brw_context *brw,
        !intel_miptree_is_lossless_compressed(brw, intel_tex->mt))
      return false;
 
-   const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format);
+   const uint32_t brw_format = brw_isl_format_for_mesa_format(intel_tex->_Format);
 
-   if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo,
-                                                brw_format))
+   if (isl_format_supports_ccs_e(&brw->screen->devinfo, brw_format))
       return false;
 
    perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
@@ -253,13 +252,16 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
       if (!tex_obj || !tex_obj->mt)
         continue;
-      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+      if (intel_miptree_sample_with_hiz(brw, tex_obj->mt))
+         intel_miptree_all_slices_resolve_hiz(brw, tex_obj->mt);
+      else
+         intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
       /* Sampling engine understands lossless compression and resolving
        * those surfaces should be skipped for performance reasons.
        */
       const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
                            0 : INTEL_MIPTREE_IGNORE_CCS_E;
-      intel_miptree_resolve_color(brw, tex_obj->mt, flags);
+      intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, flags);
       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 
       if (tex_obj->base.StencilSampling ||
@@ -270,13 +272,12 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
 
    /* Resolve color for each active shader image. */
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      const struct gl_linked_shader *shader =
-         ctx->_Shader->CurrentProgram[i] ?
-            ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
+      const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
 
-      if (unlikely(shader && shader->NumImages)) {
-         for (unsigned j = 0; j < shader->NumImages; j++) {
-            struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
+      if (unlikely(prog && prog->info.num_images)) {
+         for (unsigned j = 0; j < prog->info.num_images; j++) {
+            struct gl_image_unit *u =
+               &ctx->ImageUnits[prog->sh.ImageUnits[j]];
             tex_obj = intel_texture_object(u->TexObj);
 
             if (tex_obj && tex_obj->mt) {
@@ -287,7 +288,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
                 * compressed surfaces need to be resolved prior to accessing
                 * them. Hence skip setting INTEL_MIPTREE_IGNORE_CCS_E.
                 */
-               intel_miptree_resolve_color(brw, tex_obj->mt, 0);
+               intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, 0);
 
                if (intel_miptree_is_lossless_compressed(brw, tex_obj->mt) &&
                    intel_disable_rb_aux_buffer(brw, tex_obj->mt->bo)) {
@@ -304,7 +305,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
    /* Resolve color buffers for non-coherent framebuffer fetch. */
    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
        ctx->FragmentProgram._Current &&
-       ctx->FragmentProgram._Current->Base.OutputsRead) {
+       ctx->FragmentProgram._Current->info.outputs_read) {
       const struct gl_framebuffer *fb = ctx->DrawBuffer;
 
       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
@@ -312,8 +313,9 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
             intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 
          if (irb &&
-             intel_miptree_resolve_color(brw, irb->mt,
-                                         INTEL_MIPTREE_IGNORE_CCS_E))
+             intel_miptree_resolve_color(
+                brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count,
+                INTEL_MIPTREE_IGNORE_CCS_E))
             brw_render_cache_set_check_flush(brw, irb->mt->bo);
       }
    }
@@ -344,7 +346,7 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
           * should be impossible to get here with such surfaces.
           */
          assert(!intel_miptree_is_lossless_compressed(brw, mt));
-         intel_miptree_resolve_color(brw, mt, 0);
+         intel_miptree_all_slices_resolve_color(brw, mt, 0);
          brw_render_cache_set_check_flush(brw, mt->bo);
       }
    }
@@ -366,10 +368,10 @@ intel_flush_front(struct gl_context *ctx)
    struct brw_context *brw = brw_context(ctx);
    __DRIcontext *driContext = brw->driContext;
    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
-   __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 
    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
-      if (flushFront(screen) && driDrawable &&
+      if (flushFront(dri_screen) && driDrawable &&
           driDrawable->loaderPrivate) {
 
          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
@@ -382,7 +384,7 @@ intel_flush_front(struct gl_context *ctx)
          intel_resolve_for_dri2_flush(brw, driDrawable);
          intel_batchbuffer_flush(brw);
 
-         flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
+         flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 
          /* We set the dirty bit in intel_prepare_render() if we're
           * front buffer rendering once we get there.
@@ -411,7 +413,7 @@ intel_finish(struct gl_context * ctx)
    intel_glFlush(ctx);
 
    if (brw->batch.last_bo)
-      drm_intel_bo_wait_rendering(brw->batch.last_bo);
+      brw_bo_wait_rendering(brw, brw->batch.last_bo);
 }
 
 static void
@@ -444,7 +446,7 @@ brw_init_driver_functions(struct brw_context *brw,
    intelInitBufferFuncs(functions);
    intelInitPixelFuncs(functions);
    intelInitBufferObjectFuncs(functions);
-   intel_init_syncobj_functions(functions);
+   brw_init_syncobj_functions(functions);
    brw_init_object_purgeable_functions(functions);
 
    brwInitFragProgFuncs( functions );
@@ -463,7 +465,7 @@ brw_init_driver_functions(struct brw_context *brw,
 
    functions->NewTransformFeedback = brw_new_transform_feedback;
    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
-   if (brw->intelScreen->has_mi_math_and_lrr) {
+   if (can_do_mi_math_and_lrr(brw->screen)) {
       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
       functions->EndTransformFeedback = hsw_end_transform_feedback;
       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
@@ -478,6 +480,10 @@ brw_init_driver_functions(struct brw_context *brw,
    } else {
       functions->BeginTransformFeedback = brw_begin_transform_feedback;
       functions->EndTransformFeedback = brw_end_transform_feedback;
+      functions->PauseTransformFeedback = brw_pause_transform_feedback;
+      functions->ResumeTransformFeedback = brw_resume_transform_feedback;
+      functions->GetTransformFeedbackVertexCount =
+         brw_get_transform_feedback_vertex_count;
    }
 
    if (brw->gen >= 6)
@@ -488,7 +494,7 @@ static void
 brw_initialize_context_constants(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
-   const struct brw_compiler *compiler = brw->intelScreen->compiler;
+   const struct brw_compiler *compiler = brw->screen->compiler;
 
    const bool stage_exists[MESA_SHADER_STAGES] = {
       [MESA_SHADER_VERTEX] = true,
@@ -497,7 +503,7 @@ brw_initialize_context_constants(struct brw_context *brw)
       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
       [MESA_SHADER_FRAGMENT] = true,
       [MESA_SHADER_COMPUTE] =
-         (ctx->API == API_OPENGL_CORE &&
+         ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) &&
           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
          (ctx->API == API_OPENGLES2 &&
           ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
@@ -518,23 +524,50 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.MaxCombinedShaderOutputResources =
       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 
+   /* The timestamp register we can read for glGetTimestamp() is
+    * sometimes only 32 bits, before scaling to nanoseconds (depending
+    * on kernel).
+    *
+    * Once scaled to nanoseconds the timestamp would roll over at a
+    * non-power-of-two, so an application couldn't use
+    * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
+    * report 36 bits and truncate at that (rolling over 5 times as
+    * often as the HW counter), and when the 32-bit counter rolls
+    * over, it happens to also be at a rollover in the reported value
+    * from near (1<<36) to 0.
+    *
+    * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
+    * rolls over every ~69 seconds.
+    */
    ctx->Const.QueryCounterBits.Timestamp = 36;
 
    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
-   ctx->Const.MaxRenderbufferSize = 8192;
-   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+   if (brw->gen >= 7) {
+      ctx->Const.MaxRenderbufferSize = 16384;
+      ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
+      ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
+   } else {
+      ctx->Const.MaxRenderbufferSize = 8192;
+      ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+      ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+   }
    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
-   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
    ctx->Const.MaxTextureMbytes = 1536;
    ctx->Const.MaxTextureRectSize = 1 << 12;
    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+   ctx->Const.MaxTextureLodBias = 15.0;
    ctx->Const.StripTextureBorder = true;
-   if (brw->gen >= 7)
+   if (brw->gen >= 7) {
       ctx->Const.MaxProgramTextureGatherComponents = 4;
-   else if (brw->gen == 6)
+      ctx->Const.MinProgramTextureGatherOffset = -32;
+      ctx->Const.MaxProgramTextureGatherOffset = 31;
+   } else if (brw->gen == 6) {
       ctx->Const.MaxProgramTextureGatherComponents = 1;
+      ctx->Const.MinProgramTextureGatherOffset = -8;
+      ctx->Const.MaxProgramTextureGatherOffset = 7;
+   }
 
    ctx->Const.MaxUniformBlockSize = 65536;
 
@@ -591,10 +624,10 @@ brw_initialize_context_constants(struct brw_context *brw)
       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 
    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
-      !brw->intelScreen->has_mi_math_and_lrr;
+      !can_do_mi_math_and_lrr(brw->screen);
 
    int max_samples;
-   const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
+   const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
    const int clamp_max_samples =
       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 
@@ -655,7 +688,7 @@ brw_initialize_context_constants(struct brw_context *brw)
    if (brw->gen >= 5 || brw->is_g4x)
       ctx->Const.MaxClipPlanes = 8;
 
-   ctx->Const.LowerTessLevel = true;
+   ctx->Const.GLSLTessLevelsAsInputs = true;
    ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
    ctx->Const.LowerTESPatchVerticesIn = true;
    ctx->Const.PrimitiveRestartForPatches = true;
@@ -768,7 +801,7 @@ brw_initialize_context_constants(struct brw_context *brw)
    /* We want the GLSL compiler to emit code that uses condition codes */
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
       ctx->Const.ShaderCompilerOptions[i] =
-         brw->intelScreen->compiler->glsl_compiler_options[i];
+         brw->screen->compiler->glsl_compiler_options[i];
    }
 
    if (brw->gen >= 7) {
@@ -776,8 +809,8 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.MaxViewportHeight = 32768;
    }
 
-   /* ARB_viewport_array */
-   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
+   /* ARB_viewport_array, OES_viewport_array */
+   if (brw->gen >= 6) {
       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
       ctx->Const.ViewportSubpixelBits = 0;
 
@@ -805,20 +838,18 @@ static void
 brw_initialize_cs_context_constants(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
-   const struct intel_screen *screen = brw->intelScreen;
-   const struct gen_device_info *devinfo = screen->devinfo;
+   const struct intel_screen *screen = brw->screen;
+   struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    /* FINISHME: Do this for all platforms that the kernel supports */
    if (brw->is_cherryview &&
        screen->subslice_total > 0 && screen->eu_total > 0) {
       /* Logical CS threads = EUs per subslice * 7 threads per EU */
-      brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
+      uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 
       /* Fuse configurations may give more threads than expected, never less. */
-      if (brw->max_cs_threads < devinfo->max_cs_threads)
-         brw->max_cs_threads = devinfo->max_cs_threads;
-   } else {
-      brw->max_cs_threads = devinfo->max_cs_threads;
+      if (max_cs_threads > devinfo->max_cs_threads)
+         devinfo->max_cs_threads = max_cs_threads;
    }
 
    /* Maximum number of scalar compute shader invocations that can be run in
@@ -830,7 +861,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
     * required the number of invocation needed for ARB_compute_shader.
     */
-   const unsigned max_threads = MIN2(64, brw->max_cs_threads);
+   const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
    const uint32_t max_invocations = 32 * max_threads;
    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
@@ -852,7 +883,7 @@ brw_process_driconf_options(struct brw_context *brw)
    struct gl_context *ctx = &brw->ctx;
 
    driOptionCache *options = &brw->optionCache;
-   driParseConfigFiles(options, &brw->intelScreen->optionCache,
+   driParseConfigFiles(options, &brw->screen->optionCache,
                        brw->driContext->driScreenPriv->myNum, "i965");
 
    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
@@ -860,11 +891,11 @@ brw_process_driconf_options(struct brw_context *brw)
    case DRI_CONF_BO_REUSE_DISABLED:
       break;
    case DRI_CONF_BO_REUSE_ALL:
-      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
+      brw_bufmgr_enable_reuse(brw->bufmgr);
       break;
    }
 
-   if (!driQueryOptionb(options, "hiz")) {
+   if (INTEL_DEBUG & DEBUG_NO_HIZ) {
        brw->has_hiz = false;
        /* On gen6, you can only do separate stencil with HIZ. */
        if (brw->gen == 6)
@@ -889,17 +920,26 @@ brw_process_driconf_options(struct brw_context *brw)
    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 
    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
-      brw->intelScreen->compiler->precise_trig = true;
+      brw->screen->compiler->precise_trig = true;
 
    ctx->Const.ForceGLSLExtensionsWarn =
       driQueryOptionb(options, "force_glsl_extensions_warn");
 
+   ctx->Const.ForceGLSLVersion =
+      driQueryOptioni(options, "force_glsl_version");
+
    ctx->Const.DisableGLSLLineContinuations =
       driQueryOptionb(options, "disable_glsl_line_continuations");
 
    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 
+   ctx->Const.AllowHigherCompatVersion =
+      driQueryOptionb(options, "allow_higher_compat_version");
+
+   ctx->Const.ForceGLSLAbsSqrt =
+      driQueryOptionb(options, "force_glsl_abs_sqrt");
+
    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 
    brw->dual_color_blend_by_location =
@@ -917,10 +957,9 @@ brwCreateContext(gl_api api,
                  unsigned *dri_ctx_error,
                 void *sharedContextPrivate)
 {
-   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
-   struct intel_screen *screen = sPriv->driverPrivate;
-   const struct gen_device_info *devinfo = screen->devinfo;
+   struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
+   const struct gen_device_info *devinfo = &screen->devinfo;
    struct dd_function_table functions;
 
    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
@@ -946,7 +985,7 @@ brwCreateContext(gl_api api,
 
    driContextPriv->driverPrivate = brw;
    brw->driContext = driContextPriv;
-   brw->intelScreen = screen;
+   brw->screen = screen;
    brw->bufmgr = screen->bufmgr;
 
    brw->gen = devinfo->gen;
@@ -1045,7 +1084,7 @@ brwCreateContext(gl_api api,
 
    intel_fbo_init(brw);
 
-   intel_batchbuffer_init(brw);
+   intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc);
 
    if (brw->gen >= 6) {
       /* Create a new hardware context.  Using a hardware context means that
@@ -1055,10 +1094,10 @@ brwCreateContext(gl_api api,
        * This is required for transform feedback buffer offsets, query objects,
        * and also allows us to reduce how much state we have to emit.
        */
-      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
+      brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
 
       if (!brw->hw_ctx) {
-         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
+         fprintf(stderr, "Failed to create hardware context.\n");
          intelDestroyContext(driContextPriv);
          return false;
       }
@@ -1079,17 +1118,7 @@ brwCreateContext(gl_api api,
    if (brw->gen >= 6)
       brw_blorp_init(brw);
 
-   brw->max_vs_threads = devinfo->max_vs_threads;
-   brw->max_hs_threads = devinfo->max_hs_threads;
-   brw->max_ds_threads = devinfo->max_ds_threads;
-   brw->max_gs_threads = devinfo->max_gs_threads;
-   brw->max_wm_threads = devinfo->max_wm_threads;
    brw->urb.size = devinfo->urb.size;
-   brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
-   brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
-   brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
-   brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
-   brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 
    if (brw->gen == 6)
       brw->urb.gs_present = false;
@@ -1098,15 +1127,12 @@ brwCreateContext(gl_api api,
    brw->prim_restart.enable_cut_index = false;
    brw->gs.enabled = false;
    brw->sf.viewport_transform_enable = true;
+   brw->clip.viewport_count = 1;
 
    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 
    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
 
-   brw->use_resource_streamer = screen->has_resource_streamer &&
-      (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
-       env_var_as_boolean("INTEL_USE_GATHER", false));
-
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 
@@ -1117,8 +1143,10 @@ brwCreateContext(gl_api api,
       brw->perf_debug = true;
    }
 
-   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
+   if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
+      ctx->Const.RobustAccess = GL_TRUE;
+   }
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
       brw_init_shader_time(brw);
@@ -1128,9 +1156,8 @@ brwCreateContext(gl_api api,
    _mesa_initialize_dispatch_tables(ctx);
    _mesa_initialize_vbo_vtxfmt(ctx);
 
-   if (ctx->Extensions.AMD_performance_monitor) {
-      brw_init_performance_monitors(brw);
-   }
+   if (ctx->Extensions.INTEL_performance_query)
+      brw_init_performance_queries(brw);
 
    vbo_use_buffer_objects(ctx);
    vbo_always_unmap_buffers(ctx);
@@ -1145,12 +1172,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
       (struct brw_context *) driContextPriv->driverPrivate;
    struct gl_context *ctx = &brw->ctx;
 
-   /* Dump a final BMP in case the application doesn't call SwapBuffers */
-   if (INTEL_DEBUG & DEBUG_AUB) {
-      intel_batchbuffer_flush(brw);
-      aub_dump_bmp(&brw->ctx);
-   }
-
    _mesa_meta_free(&brw->ctx);
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
@@ -1167,23 +1188,19 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    brw_destroy_state(brw);
    brw_draw_destroy(brw);
 
-   drm_intel_bo_unreference(brw->curbe.curbe_bo);
+   brw_bo_unreference(brw->curbe.curbe_bo);
    if (brw->vs.base.scratch_bo)
-      drm_intel_bo_unreference(brw->vs.base.scratch_bo);
+      brw_bo_unreference(brw->vs.base.scratch_bo);
    if (brw->tcs.base.scratch_bo)
-      drm_intel_bo_unreference(brw->tcs.base.scratch_bo);
+      brw_bo_unreference(brw->tcs.base.scratch_bo);
    if (brw->tes.base.scratch_bo)
-      drm_intel_bo_unreference(brw->tes.base.scratch_bo);
+      brw_bo_unreference(brw->tes.base.scratch_bo);
    if (brw->gs.base.scratch_bo)
-      drm_intel_bo_unreference(brw->gs.base.scratch_bo);
+      brw_bo_unreference(brw->gs.base.scratch_bo);
    if (brw->wm.base.scratch_bo)
-      drm_intel_bo_unreference(brw->wm.base.scratch_bo);
-
-   gen7_reset_hw_bt_pool_offsets(brw);
-   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
-   brw->hw_bt_pool.bo = NULL;
+      brw_bo_unreference(brw->wm.base.scratch_bo);
 
-   drm_intel_gem_context_destroy(brw->hw_ctx);
+   brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
 
    if (ctx->swrast_context) {
       _swsetup_DestroyContext(&brw->ctx);
@@ -1195,10 +1212,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
       _swrast_DestroyContext(&brw->ctx);
 
    brw_fini_pipe_control(brw);
-   intel_batchbuffer_free(brw);
+   intel_batchbuffer_free(&brw->batch);
 
-   drm_intel_bo_unreference(brw->throttle_batch[1]);
-   drm_intel_bo_unreference(brw->throttle_batch[0]);
+   brw_bo_unreference(brw->throttle_batch[1]);
+   brw_bo_unreference(brw->throttle_batch[0]);
    brw->throttle_batch[1] = NULL;
    brw->throttle_batch[0] = NULL;
 
@@ -1354,10 +1371,13 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
       rb = intel_get_renderbuffer(fb, buffers[i]);
       if (rb == NULL || rb->mt == NULL)
          continue;
-      if (rb->mt->num_samples <= 1)
-         intel_miptree_resolve_color(brw, rb->mt, 0);
-      else
+      if (rb->mt->num_samples <= 1) {
+         assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
+                rb->layer_count == 1);
+         intel_miptree_resolve_color(brw, rb->mt, 0, 0, 1, 0);
+      } else {
          intel_renderbuffer_downsample(brw, rb);
+      }
    }
 }
 
@@ -1443,7 +1463,7 @@ void
 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
 {
    struct brw_context *brw = context->driverPrivate;
-   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 
    /* Set this up front, so that in case our buffers get invalidated
     * while we're getting new buffers, we don't clobber the stamp and
@@ -1453,7 +1473,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
 
-   if (screen->image.loader)
+   if (dri_screen->image.loader)
       intel_update_image_buffers(brw, drawable);
    else
       intel_update_dri2_buffers(brw, drawable);
@@ -1517,7 +1537,7 @@ intel_query_dri2_buffers(struct brw_context *brw,
                          __DRIbuffer **buffers,
                          int *buffer_count)
 {
-   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
    struct gl_framebuffer *fb = drawable->driverPrivate;
    int i = 0;
    unsigned attachments[8];
@@ -1561,12 +1581,13 @@ intel_query_dri2_buffers(struct brw_context *brw,
 
    assert(i <= ARRAY_SIZE(attachments));
 
-   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
-                                                        &drawable->w,
-                                                        &drawable->h,
-                                                        attachments, i / 2,
-                                                        buffer_count,
-                                                        drawable->loaderPrivate);
+   *buffers =
+      dri_screen->dri2.loader->getBuffersWithFormat(drawable,
+                                                    &drawable->w,
+                                                    &drawable->h,
+                                                    attachments, i / 2,
+                                                    buffer_count,
+                                                    drawable->loaderPrivate);
 }
 
 /**
@@ -1579,7 +1600,7 @@ intel_query_dri2_buffers(struct brw_context *brw,
  *    DRI2BufferDepthStencil are handled as special cases.
  *
  * \param buffer_name is a human readable name, such as "dri2 front buffer",
- *        that is passed to drm_intel_bo_gem_create_from_name().
+ *        that is passed to brw_bo_gem_create_from_name().
  *
  * \see intel_update_renderbuffers()
  */
@@ -1591,7 +1612,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
                           const char *buffer_name)
 {
    struct gl_framebuffer *fb = drawable->driverPrivate;
-   drm_intel_bo *bo;
+   struct brw_bo *bo;
 
    if (!rb)
       return;
@@ -1612,10 +1633,10 @@ intel_process_dri2_buffer(struct brw_context *brw,
    if (last_mt) {
        /* The bo already has a name because the miptree was created by a
        * previous call to intel_process_dri2_buffer(). If a bo already has a
-       * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
+       * name, then brw_bo_flink() is a low-cost getter.  It does not
        * create a new name.
        */
-      drm_intel_bo_flink(last_mt->bo, &old_name);
+      brw_bo_flink(last_mt->bo, &old_name);
    }
 
    if (old_name == buffer->name)
@@ -1628,7 +1649,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
               buffer->cpp, buffer->pitch);
    }
 
-   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
+   bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
                                           buffer->name);
    if (!bo) {
       fprintf(stderr,
@@ -1653,7 +1674,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
 
    assert(rb->mt);
 
-   drm_intel_bo_unreference(bo);
+   brw_bo_unreference(bo);
 }
 
 /**
@@ -1713,7 +1734,7 @@ static void
 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
 {
    struct gl_framebuffer *fb = drawable->driverPrivate;
-   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
    struct intel_renderbuffer *front_rb;
    struct intel_renderbuffer *back_rb;
    struct __DRIimageList images;
@@ -1739,12 +1760,12 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
    if (back_rb)
       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
 
-   ret = screen->image.loader->getBuffers(drawable,
-                                          driGLFormatToImageFormat(format),
-                                          &drawable->dri2.stamp,
-                                          drawable->loaderPrivate,
-                                          buffer_mask,
-                                          &images);
+   ret = dri_screen->image.loader->getBuffers(drawable,
+                                              driGLFormatToImageFormat(format),
+                                              &drawable->dri2.stamp,
+                                              drawable->loaderPrivate,
+                                              buffer_mask,
+                                              &images);
    if (!ret)
       return;