X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.c;h=7c1c13300dc6d5811ace186a05b3174b71978ae4;hb=094877f9d23169b1d209fb0c97f9b6d4679842d9;hp=3ce9ff6f187df0f21c3e528f51fa6121d9b9e86b;hpb=e3a9ca4563790f54976a969bf70cd5f45cbc4e13;p=mesa.git

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 3ce9ff6f187..7c1c13300dc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -34,11 +34,13 @@
 #include "main/api_exec.h"
 #include "main/context.h"
 #include "main/fbobject.h"
+#include "main/extensions.h"
 #include "main/imports.h"
 #include "main/macros.h"
 #include "main/points.h"
 #include "main/version.h"
 #include "main/vtxfmt.h"
+#include "main/texobj.h"
 
 #include "vbo/vbo_context.h"
 
@@ -48,6 +50,7 @@
 
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_shader.h"
 #include "brw_draw.h"
 #include "brw_state.h"
 
@@ -57,14 +60,14 @@
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 #include "intel_pixel.h"
-#include "intel_regions.h"
+#include "intel_image.h"
 #include "intel_tex.h"
 #include "intel_tex_obj.h"
 
 #include "swrast_setup/swrast_setup.h"
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
-#include "glsl/ralloc.h"
+#include "util/ralloc.h"
 
 /***************************************
  * Mesa's Driver Functions
@@ -79,6 +82,7 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
    (void) target;
 
    switch (brw->gen) {
+   case 9:
    case 8:
       samples[0] = 8;
       samples[1] = 4;
@@ -95,6 +99,7 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
       return 1;
 
    default:
+      assert(brw->gen < 6);
       samples[0] = 1;
       return 1;
    }
@@ -122,7 +127,7 @@ brw_get_renderer_string(unsigned deviceID)
 }
 
 static const GLubyte *
-intelGetString(struct gl_context * ctx, GLenum name)
+intel_get_string(struct gl_context * ctx, GLenum name)
 {
    const struct brw_context *const brw = brw_context(ctx);
 
@@ -152,15 +157,39 @@ intel_viewport(struct gl_context *ctx)
 }
 
 static void
-intelInvalidateState(struct gl_context * ctx, GLuint new_state)
+intel_update_state(struct gl_context * ctx, GLuint new_state)
 {
    struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_object *tex_obj;
+   struct intel_renderbuffer *depth_irb;
 
    if (ctx->swrast_context)
       _swrast_InvalidateState(ctx, new_state);
    _vbo_InvalidateState(ctx, new_state);
 
    brw->NewGLState |= new_state;
+
+   _mesa_unlock_context_textures(ctx);
+
+   /* Resolve the depth buffer's HiZ buffer. */
+   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+   if (depth_irb)
+      intel_renderbuffer_resolve_hiz(brw, depth_irb);
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
+   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+   for (int i = 0; i <= maxEnabledUnit; i++) {
+      if (!ctx->Texture.Unit[i]._Current)
+	 continue;
+      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+      if (!tex_obj || !tex_obj->mt)
+	 continue;
+      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+      intel_miptree_resolve_color(brw, tex_obj->mt);
+      brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+   }
+
+   _mesa_lock_context_textures(ctx);
 }
 
 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
@@ -204,12 +233,12 @@ intel_glFlush(struct gl_context *ctx)
 
    intel_batchbuffer_flush(brw);
    intel_flush_front(ctx);
-   if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
-      brw->need_throttle = true;
+
+   brw->need_flush_throttle = true;
 }
 
-void
-intelFinish(struct gl_context * ctx)
+static void
+intel_finish(struct gl_context * ctx)
 {
    struct brw_context *brw = brw_context(ctx);
 
@@ -236,14 +265,15 @@ brw_init_driver_functions(struct brw_context *brw,
       functions->Viewport = intel_viewport;
 
    functions->Flush = intel_glFlush;
-   functions->Finish = intelFinish;
-   functions->GetString = intelGetString;
-   functions->UpdateState = intelInvalidateState;
+   functions->Finish = intel_finish;
+   functions->GetString = intel_get_string;
+   functions->UpdateState = intel_update_state;
 
    intelInitTextureFuncs(functions);
    intelInitTextureImageFuncs(functions);
    intelInitTextureSubImageFuncs(functions);
    intelInitTextureCopyImageFuncs(functions);
+   intelInitCopyImageFuncs(functions);
    intelInitClearFuncs(functions);
    intelInitBufferFuncs(functions);
    intelInitPixelFuncs(functions);
@@ -257,6 +287,9 @@ brw_init_driver_functions(struct brw_context *brw,
       gen6_init_queryobj_functions(functions);
    else
       gen4_init_queryobj_functions(functions);
+   brw_init_compute_functions(functions);
+   if (brw->gen >= 7)
+      brw_init_conditional_render_functions(functions);
 
    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 
@@ -290,6 +323,15 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    ctx->Const.StripTextureBorder = true;
 
+   ctx->Const.MaxUniformBlockSize = 65536;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_program_constants *prog = &ctx->Const.Program[i];
+      prog->MaxUniformBlocks = 12;
+      prog->MaxCombinedUniformComponents =
+         prog->MaxUniformComponents +
+         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+   }
+
    ctx->Const.MaxDualSourceDrawBuffers = 1;
    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
@@ -298,11 +340,11 @@ brw_initialize_context_constants(struct brw_context *brw)
       MIN2(ctx->Const.MaxTextureCoordUnits,
            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
-   if (brw->gen >= 7)
+   if (brw->gen >= 6)
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
    else
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
-   if (getenv("INTEL_COMPUTE_SHADER")) {
+   if (_mesa_extension_override_enables.ARB_compute_shader) {
       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
       ctx->Const.MaxUniformBufferBindings += 12;
    } else {
@@ -379,6 +421,14 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.MaxDepthTextureSamples = max_samples;
    ctx->Const.MaxIntegerSamples = max_samples;
 
+   /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
+    * to map indices of rectangular grid to sample numbers within a pixel.
+    * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
+    * extension implementation. For more details see the comment above
+    * gen6_set_sample_maps() definition.
+    */
+   gen6_set_sample_maps(ctx);
+
    if (brw->gen >= 7)
       ctx->Const.MaxProgramTextureGatherComponents = 4;
    else if (brw->gen == 6)
@@ -386,9 +436,22 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    ctx->Const.MinLineWidth = 1.0;
    ctx->Const.MinLineWidthAA = 1.0;
-   ctx->Const.MaxLineWidth = 5.0;
-   ctx->Const.MaxLineWidthAA = 5.0;
-   ctx->Const.LineWidthGranularity = 0.5;
+   if (brw->gen >= 6) {
+      ctx->Const.MaxLineWidth = 7.375;
+      ctx->Const.MaxLineWidthAA = 7.375;
+      ctx->Const.LineWidthGranularity = 0.125;
+   } else {
+      ctx->Const.MaxLineWidth = 7.0;
+      ctx->Const.MaxLineWidthAA = 7.0;
+      ctx->Const.LineWidthGranularity = 0.5;
+   }
+
+   /* For non-antialiased lines, we have to round the line width to the
+    * nearest whole number. Make sure that we don't advertise a line
+    * width that, when rounded, will be beyond the actual hardware
+    * maximum.
+    */
+   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 
    ctx->Const.MinPointSize = 1.0;
    ctx->Const.MinPointSizeAA = 1.0;
@@ -435,6 +498,12 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 
+   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
+   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
+   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
+   ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
+   ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
+
    if (brw->gen >= 7) {
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
@@ -445,6 +514,18 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
+
+      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
+         BRW_MAX_IMAGES;
+      ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
+         (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
+         BRW_MAX_IMAGES;
+      ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+      ctx->Const.MaxCombinedShaderOutputResources =
+         MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
+      ctx->Const.MaxImageSamples = 0;
+      ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
    }
 
    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
@@ -456,7 +537,25 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 
    ctx->Const.NativeIntegers = true;
-   ctx->Const.UniformBooleanTrue = 1;
+   ctx->Const.VertexID_is_zero_based = true;
+
+   /* Regarding the CMP instruction, the Ivybridge PRM says:
+    *
+    *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
+    *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
+    *    0xFFFFFFFF) is assigned to dst."
+    *
+    * but PRMs for earlier generations say
+    *
+    *   "In dword format, one GRF may store up to 8 results. When the register
+    *    is used later as a vector of Booleans, as only LSB at each channel
+    *    contains meaning [sic] data, software should make sure all higher bits
+    *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
+    *
+    * We select the representation of a true boolean uniform to be ~0, and fix
+    * the results of Gen <= 5 CMP instruction's with -(result & 1).
+    */
+   ctx->Const.UniformBooleanTrue = ~0;
 
    /* From the gen4 PRM, volume 4 page 127:
     *
@@ -469,6 +568,7 @@ brw_initialize_context_constants(struct brw_context *brw)
     */
    ctx->Const.UniformBufferOffsetAlignment = 16;
    ctx->Const.TextureBufferOffsetAlignment = 16;
+   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
@@ -480,32 +580,52 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    /* We want the GLSL compiler to emit code that uses condition codes */
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
-      ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
-      ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
-      ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
-      ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
-      ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput =
-	 (i == MESA_SHADER_FRAGMENT);
-      ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
-	 (i == MESA_SHADER_FRAGMENT);
-      ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
-      ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
+      ctx->Const.ShaderCompilerOptions[i] =
+         brw->intelScreen->compiler->glsl_compiler_options[i];
    }
 
-   ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-   ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
    /* ARB_viewport_array */
-   if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
-      ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
+   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
+      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
       ctx->Const.ViewportSubpixelBits = 0;
 
-      /* Cast to float before negating becuase MaxViewportWidth is unsigned.
+      /* Cast to float before negating because MaxViewportWidth is unsigned.
        */
       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
    }
+
+   /* ARB_gpu_shader5 */
+   if (brw->gen >= 7)
+      ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
+
+   /* ARB_framebuffer_no_attachments */
+   ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
+   ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
+   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
+   ctx->Const.MaxFramebufferSamples = max_samples;
+}
+
+static void
+brw_adjust_cs_context_constants(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+
+   /* For ES, we set these constants based on SIMD8.
+    *
+    * TODO: Once we can always generate SIMD16, we should update this.
+    *
+    * For GL, we assume we can generate a SIMD16 program, but this currently
+    * is not always true. This allows us to run more test cases, and will be
+    * required based on desktop GL compute shader requirements.
+    */
+   const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
+
+   const uint32_t max_invocations = simd_size * brw->max_cs_threads;
+   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
+   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
+   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
+   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 }
 
 /**
@@ -555,9 +675,6 @@ brw_process_driconf_options(struct brw_context *brw)
       brw->disable_throttling = true;
    }
 
-   brw->disable_derivative_optimization =
-      driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
-
    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 
    ctx->Const.ForceGLSLExtensionsWarn =
@@ -565,6 +682,9 @@ brw_process_driconf_options(struct brw_context *brw)
 
    ctx->Const.DisableGLSLLineContinuations =
       driQueryOptionb(options, "disable_glsl_line_continuations");
+
+   ctx->Const.AllowGLSLExtensionDirectiveMidShader =
+      driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 }
 
 GLboolean
@@ -600,7 +720,7 @@ brwCreateContext(gl_api api,
 
    struct brw_context *brw = rzalloc(NULL, struct brw_context);
    if (!brw) {
-      fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
+      fprintf(stderr, "%s: failed to alloc context\n", __func__);
       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
       return false;
    }
@@ -615,6 +735,8 @@ brwCreateContext(gl_api api,
    brw->is_g4x = devinfo->is_g4x;
    brw->is_baytrail = devinfo->is_baytrail;
    brw->is_haswell = devinfo->is_haswell;
+   brw->is_cherryview = devinfo->is_cherryview;
+   brw->is_broxton = devinfo->is_broxton;
    brw->has_llc = devinfo->has_llc;
    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
@@ -628,17 +750,20 @@ brwCreateContext(gl_api api,
    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
    brw->has_swizzling = screen->hw_has_swizzling;
 
+   brw->vs.base.stage = MESA_SHADER_VERTEX;
+   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
+   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
    if (brw->gen >= 8) {
       gen8_init_vtable_surface_functions(brw);
-      gen7_init_vtable_sampler_functions(brw);
       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
    } else if (brw->gen >= 7) {
       gen7_init_vtable_surface_functions(brw);
-      gen7_init_vtable_sampler_functions(brw);
       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
+   } else if (brw->gen >= 6) {
+      gen6_init_vtable_surface_functions(brw);
+      brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
    } else {
       gen4_init_vtable_surface_functions(brw);
-      gen4_init_vtable_sampler_functions(brw);
       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
    }
 
@@ -651,7 +776,7 @@ brwCreateContext(gl_api api,
 
    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-      fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
+      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
       intelDestroyContext(driContextPriv);
       return false;
    }
@@ -682,7 +807,10 @@ brwCreateContext(gl_api api,
    _mesa_meta_init(ctx);
 
    brw_process_driconf_options(brw);
-   brw_process_intel_debug_variable(brw);
+
+   if (INTEL_DEBUG & DEBUG_PERF)
+      brw->perf_debug = true;
+
    brw_initialize_context_constants(brw);
 
    ctx->Const.ResetStrategy = notify_reset
@@ -712,6 +840,12 @@ brwCreateContext(gl_api api,
       }
    }
 
+   if (brw_init_pipe_control(brw, devinfo)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      intelDestroyContext(driContextPriv);
+      return false;
+   }
+
    brw_init_state(brw);
 
    intelInitExtensions(ctx);
@@ -719,13 +853,20 @@ brwCreateContext(gl_api api,
    brw_init_surface_formats(brw);
 
    brw->max_vs_threads = devinfo->max_vs_threads;
+   brw->max_hs_threads = devinfo->max_hs_threads;
+   brw->max_ds_threads = devinfo->max_ds_threads;
    brw->max_gs_threads = devinfo->max_gs_threads;
    brw->max_wm_threads = devinfo->max_wm_threads;
+   brw->max_cs_threads = devinfo->max_cs_threads;
    brw->urb.size = devinfo->urb.size;
    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
+   brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
+   brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 
+   brw_adjust_cs_context_constants(brw);
+
    /* Estimate the size of the mappable aperture into the GTT.  There's an
     * ioctl to get the whole GTT size, but not one to get the mappable subset.
     * It turns out it's basically always 256MB, though some ancient hardware
@@ -742,16 +883,18 @@ brwCreateContext(gl_api api,
    brw->max_gtt_map_object_size = gtt_size / 4;
 
    if (brw->gen == 6)
-      brw->urb.gen6_gs_previously_active = false;
+      brw->urb.gs_present = false;
 
    brw->prim_restart.in_progress = false;
    brw->prim_restart.enable_cut_index = false;
    brw->gs.enabled = false;
+   brw->sf.viewport_transform_enable = true;
 
-   if (brw->gen < 6) {
-      brw->curbe.last_buf = calloc(1, 4096);
-      brw->curbe.next_buf = calloc(1, 4096);
-   }
+   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+
+   brw->use_resource_streamer = screen->has_resource_streamer &&
+      (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
+       brw_env_var_as_boolean("INTEL_USE_GATHER", false));
 
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
@@ -778,6 +921,9 @@ brwCreateContext(gl_api api,
       brw_init_performance_monitors(brw);
    }
 
+   vbo_use_buffer_objects(ctx);
+   vbo_always_unmap_buffers(ctx);
+
    return true;
 }
 
@@ -788,10 +934,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
       (struct brw_context *) driContextPriv->driverPrivate;
    struct gl_context *ctx = &brw->ctx;
 
-   assert(brw); /* should never be null */
-   if (!brw)
-      return;
-
    /* Dump a final BMP in case the application doesn't call SwapBuffers */
    if (INTEL_DEBUG & DEBUG_AUB) {
       intel_batchbuffer_flush(brw);
@@ -799,6 +941,7 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    }
 
    _mesa_meta_free(&brw->ctx);
+   brw_meta_fast_clear_free(brw);
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
       /* Force a report. */
@@ -812,9 +955,16 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    brw_draw_destroy(brw);
 
    drm_intel_bo_unreference(brw->curbe.curbe_bo);
+   if (brw->vs.base.scratch_bo)
+      drm_intel_bo_unreference(brw->vs.base.scratch_bo);
+   if (brw->gs.base.scratch_bo)
+      drm_intel_bo_unreference(brw->gs.base.scratch_bo);
+   if (brw->wm.base.scratch_bo)
+      drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 
-   free(brw->curbe.last_buf);
-   free(brw->curbe.next_buf);
+   gen7_reset_hw_bt_pool_offsets(brw);
+   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+   brw->hw_bt_pool.bo = NULL;
 
    drm_intel_gem_context_destroy(brw->hw_ctx);
 
@@ -827,10 +977,13 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    if (ctx->swrast_context)
       _swrast_DestroyContext(&brw->ctx);
 
+   brw_fini_pipe_control(brw);
    intel_batchbuffer_free(brw);
 
-   drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
-   brw->first_post_swapbuffers_batch = NULL;
+   drm_intel_bo_unreference(brw->throttle_batch[1]);
+   drm_intel_bo_unreference(brw->throttle_batch[0]);
+   brw->throttle_batch[1] = NULL;
+   brw->throttle_batch[0] = NULL;
 
    driDestroyOptionCache(&brw->optionCache);
 
@@ -859,7 +1012,7 @@ intelUnbindContext(__DRIcontext * driContextPriv)
  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
  * for a visual where you're guaranteed to be capable, but it turns out that
  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
- * incapable ones, becuase there's no difference between the two in resources
+ * incapable ones, because there's no difference between the two in resources
  * used.  Applications thus get built that accidentally rely on the default
  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
  * great...
@@ -924,13 +1077,17 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
       struct gl_context *ctx = &brw->ctx;
       struct gl_framebuffer *fb, *readFb;
 
-      if (driDrawPriv == NULL && driReadPriv == NULL) {
+      if (driDrawPriv == NULL) {
          fb = _mesa_get_incomplete_framebuffer();
-         readFb = _mesa_get_incomplete_framebuffer();
       } else {
          fb = driDrawPriv->driverPrivate;
-         readFb = driReadPriv->driverPrivate;
          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+      }
+
+      if (driReadPriv == NULL) {
+         readFb = _mesa_get_incomplete_framebuffer();
+      } else {
+         readFb = driReadPriv->driverPrivate;
          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
       }
 
@@ -1119,29 +1276,6 @@ intel_prepare_render(struct brw_context *brw)
     */
    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
       brw->front_buffer_dirty = true;
-
-   /* Wait for the swapbuffers before the one we just emitted, so we
-    * don't get too many swaps outstanding for apps that are GPU-heavy
-    * but not CPU-heavy.
-    *
-    * We're using intelDRI2Flush (called from the loader before
-    * swapbuffer) and glFlush (for front buffer rendering) as the
-    * indicator that a frame is done and then throttle when we get
-    * here as we prepare to render the next frame.  At this point for
-    * round trips for swap/copy and getting new buffers are done and
-    * we'll spend less time waiting on the GPU.
-    *
-    * Unfortunately, we don't have a handle to the batch containing
-    * the swap, and getting our hands on that doesn't seem worth it,
-    * so we just us the first batch we emitted after the last swap.
-    */
-   if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
-      if (!brw->disable_throttling)
-         drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
-      drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
-      brw->first_post_swapbuffers_batch = NULL;
-      brw->need_throttle = false;
-   }
 }
 
 /**
@@ -1265,7 +1399,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
 	* name, then drm_intel_bo_flink() is a low-cost getter.  It does not
 	* create a new name.
 	*/
-      drm_intel_bo_flink(last_mt->region->bo, &old_name);
+      drm_intel_bo_flink(last_mt->bo, &old_name);
    }
 
    if (old_name == buffer->name)
@@ -1346,7 +1480,7 @@ intel_update_image_buffer(struct brw_context *intel,
    else
       last_mt = rb->singlesample_mt;
 
-   if (last_mt && last_mt->region->bo == buffer->bo)
+   if (last_mt && last_mt->bo == buffer->bo)
       return;
 
    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,