i965: Try to avoid stalls on the GPU when doing glBufferSubData().
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
index 04045a584a572b591815e63dddd18d968b9f533c..cb82fb042ef39411f99c9097b4f278c25997fbd0 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
  develop this 3D driver.
 
 #include "main/api_exec.h"
 #include "main/context.h"
+#include "main/fbobject.h"
 #include "main/imports.h"
 #include "main/macros.h"
 #include "main/points.h"
-#include "main/simple_list.h"
 #include "main/version.h"
 #include "main/vtxfmt.h"
 
 #include "vbo/vbo_context.h"
 
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "utils.h"
+
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_draw.h"
 #include "brw_state.h"
 
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "intel_buffers.h"
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
+#include "intel_pixel.h"
 #include "intel_regions.h"
 #include "intel_tex.h"
 #include "intel_tex_obj.h"
 
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 #include "glsl/ralloc.h"
 
@@ -83,21 +94,163 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
    }
 }
 
-static void brwInitDriverFunctions(struct intel_screen *screen,
-                                  struct dd_function_table *functions)
+static const GLubyte *
+intelGetString(struct gl_context * ctx, GLenum name)
+{
+   const struct brw_context *const brw = brw_context(ctx);
+   const char *chipset;
+   static char buffer[128];
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *) "Intel Open Source Technology Center";
+      break;
+
+   case GL_RENDERER:
+      switch (brw->intelScreen->deviceID) {
+#undef CHIPSET
+#define CHIPSET(id, family, str) case id: chipset = str; break;
+#include "pci_ids/i965_pci_ids.h"
+      default:
+         chipset = "Unknown Intel Chipset";
+         break;
+      }
+
+      (void) driGetRendererString(buffer, chipset, 0);
+      return (GLubyte *) buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+static void
+intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   struct brw_context *brw = brw_context(ctx);
+   __DRIcontext *driContext = brw->driContext;
+
+   if (brw->saved_viewport)
+      brw->saved_viewport(ctx, x, y, w, h);
+
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      dri2InvalidateDrawable(driContext->driDrawablePriv);
+      dri2InvalidateDrawable(driContext->driReadablePriv);
+   }
+}
+
+static void
+intelInvalidateState(struct gl_context * ctx, GLuint new_state)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   if (ctx->swrast_context)
+      _swrast_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+
+   brw->NewGLState |= new_state;
+}
+
+static void
+intel_flush_front(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   __DRIcontext *driContext = brw->driContext;
+   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
+   __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
+
+   if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      if (screen->dri2.loader->flushFrontBuffer != NULL &&
+          driDrawable &&
+          driDrawable->loaderPrivate) {
+
+         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
+          *
+          * This potentially resolves both front and back buffer. It
+          * is unnecessary to resolve the back, but harms nothing except
+          * performance. And no one cares about front-buffer render
+          * performance.
+          */
+         intel_resolve_for_dri2_flush(brw, driDrawable);
+         intel_batchbuffer_flush(brw);
+
+         screen->dri2.loader->flushFrontBuffer(driDrawable,
+                                               driDrawable->loaderPrivate);
+
+         /* We set the dirty bit in intel_prepare_render() if we're
+          * front buffer rendering once we get there.
+          */
+         brw->front_buffer_dirty = false;
+      }
+   }
+}
+
+static void
+intel_glFlush(struct gl_context *ctx)
 {
-   intelInitDriverFunctions( functions );
+   struct brw_context *brw = brw_context(ctx);
+
+   intel_batchbuffer_flush(brw);
+   intel_flush_front(ctx);
+   if (brw->is_front_buffer_rendering)
+      brw->need_throttle = true;
+}
+
+void
+intelFinish(struct gl_context * ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   intel_glFlush(ctx);
+
+   if (brw->batch.last_bo)
+      drm_intel_bo_wait_rendering(brw->batch.last_bo);
+}
+
+static void
+brw_init_driver_functions(struct brw_context *brw,
+                          struct dd_function_table *functions)
+{
+   _mesa_init_driver_functions(functions);
+
+   /* GLX uses DRI2 invalidate events to handle window resizing.
+    * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
+    * which doesn't provide a mechanism for snooping the event queues.
+    *
+    * So EGL still relies on viewport hacks to handle window resizing.
+    * This should go away with DRI3000.
+    */
+   if (!brw->driContext->driScreenPriv->dri2.useInvalidate) {
+      brw->saved_viewport = functions->Viewport;
+      functions->Viewport = intel_viewport;
+   }
+
+   functions->Flush = intel_glFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+
+   intelInitTextureFuncs(functions);
+   intelInitTextureImageFuncs(functions);
+   intelInitTextureSubImageFuncs(functions);
+   intelInitTextureCopyImageFuncs(functions);
+   intelInitClearFuncs(functions);
+   intelInitBufferFuncs(functions);
+   intelInitPixelFuncs(functions);
+   intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
+   brw_init_object_purgeable_functions(functions);
 
    brwInitFragProgFuncs( functions );
    brw_init_common_queryobj_functions(functions);
-   if (screen->devinfo->gen >= 6)
+   if (brw->gen >= 6)
       gen6_init_queryobj_functions(functions);
    else
       gen4_init_queryobj_functions(functions);
 
    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 
-   if (screen->devinfo->gen >= 7) {
+   if (brw->gen >= 7) {
       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
       functions->EndTransformFeedback = gen7_end_transform_feedback;
    } else {
@@ -105,7 +258,7 @@ static void brwInitDriverFunctions(struct intel_screen *screen,
       functions->EndTransformFeedback = brw_end_transform_feedback;
    }
 
-   if (screen->devinfo->gen >= 6)
+   if (brw->gen >= 6)
       functions->GetSamplePosition = gen6_get_sample_position;
 }
 
@@ -241,7 +394,18 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    ctx->Const.NativeIntegers = true;
    ctx->Const.UniformBooleanTrue = 1;
+
+   /* From the gen4 PRM, volume 4 page 127:
+    *
+    *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
+    *      the base address of the first element of the surface, computed in
+    *      software by adding the surface base address to the byte offset of
+    *      the element in the buffer."
+    *
+    * However, unaligned accesses are slower, so enforce buffer alignment.
+    */
    ctx->Const.UniformBufferOffsetAlignment = 16;
+   ctx->Const.TextureBufferOffsetAlignment = 16;
 
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
@@ -376,9 +540,17 @@ brwCreateContext(gl_api api,
    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
    brw->has_swizzling = screen->hw_has_swizzling;
 
-   brwInitVtbl( brw );
+   if (brw->gen >= 7) {
+      gen7_init_vtable_surface_functions(brw);
+      gen7_init_vtable_sampler_functions(brw);
+      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
+   } else {
+      gen4_init_vtable_surface_functions(brw);
+      gen4_init_vtable_sampler_functions(brw);
+      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
+   }
 
-   brwInitDriverFunctions(screen, &functions);
+   brw_init_driver_functions(brw, &functions);
 
    struct gl_context *ctx = &brw->ctx;
 
@@ -394,21 +566,42 @@ brwCreateContext(gl_api api,
       return false;
    }
 
-   brw_process_driconf_options(brw);
+   /* Initialize the software rasterizer and helper modules.
+    *
+    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
+    * software fallbacks (which we have to support on legacy GL to do weird
+    * glDrawPixels(), glBitmap(), and other functions).
+    */
+   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
+      _swrast_CreateContext(ctx);
+   }
 
-   if (!intelInitContext( brw, api, major_version, minor_version,
-                          mesaVis, driContextPriv,
-                         sharedContextPrivate, &functions,
-                         dri_ctx_error)) {
-      intelDestroyContext(driContextPriv);
-      return false;
+   _vbo_CreateContext(ctx);
+   if (ctx->swrast_context) {
+      _tnl_CreateContext(ctx);
+      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+      _swsetup_CreateContext(ctx);
+
+      /* Configure swrast to match hardware characteristics: */
+      _swrast_allow_pixel_fog(ctx, false);
+      _swrast_allow_vertex_fog(ctx, true);
    }
 
+   _mesa_meta_init(ctx);
+
+   brw_process_driconf_options(brw);
+   brw_process_intel_debug_variable(brw);
    brw_initialize_context_constants(brw);
 
    /* Reinitialize the context point state.  It depends on ctx->Const values. */
    _mesa_init_point(ctx);
 
+   intelInitExtensions(ctx);
+
+   intel_batchbuffer_init(brw);
+
+   intel_fbo_init(brw);
+
    if (brw->gen >= 6) {
       /* Create a new hardware context.  Using a hardware context means that
        * our GPU state will be saved/restored on context switch, allowing us
@@ -428,19 +621,9 @@ brwCreateContext(gl_api api,
 
    brw_init_surface_formats(brw);
 
-   /* Initialize swrast, tnl driver tables: */
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   if (tnl)
-      tnl->Driver.RunPipeline = _tnl_run_pipeline;
-
-   ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
-   ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
-   ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
-
    if (brw->is_g4x || brw->gen >= 5) {
       brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
-      brw->has_aa_line_parameters = true;
   } else {
       brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
@@ -454,6 +637,21 @@ brwCreateContext(gl_api api,
    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 
+   /* Estimate the size of the mappable aperture into the GTT.  There's an
+    * ioctl to get the whole GTT size, but not one to get the mappable subset.
+    * It turns out it's basically always 256MB, though some ancient hardware
+    * was smaller.
+    */
+   uint32_t gtt_size = 256 * 1024 * 1024;
+
+   /* We don't want to map two objects such that a memcpy between them would
+    * just fault one mapping in and then the other over and over forever.  So
+    * we would need to divide the GTT size by 2.  Additionally, some GTT is
+    * taken up by things like the framebuffer and the ringbuffer and such, so
+    * be more conservative.
+    */
+   brw->max_gtt_map_object_size = gtt_size / 4;
+
    if (brw->gen == 6)
       brw->urb.gen6_gs_previously_active = false;
 
@@ -467,18 +665,6 @@ brwCreateContext(gl_api api,
       brw->curbe.next_buf = calloc(1, 4096);
    }
 
-   brw->state.dirty.mesa = ~0;
-   brw->state.dirty.brw = ~0;
-
-   /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
-    * dirty flags.
-    */
-   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
-
-   brw->emit_state_always = 0;
-
-   brw->batch.need_workaround_flush = true;
-
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 
@@ -507,3 +693,485 @@ brwCreateContext(gl_api api,
    return true;
 }
 
+void
+intelDestroyContext(__DRIcontext * driContextPriv)
+{
+   struct brw_context *brw =
+      (struct brw_context *) driContextPriv->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+
+   assert(brw); /* should never be null */
+   if (!brw)
+      return;
+
+   /* Dump a final BMP in case the application doesn't call SwapBuffers */
+   if (INTEL_DEBUG & DEBUG_AUB) {
+      intel_batchbuffer_flush(brw);
+      aub_dump_bmp(&brw->ctx);
+   }
+
+   _mesa_meta_free(&brw->ctx);
+
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      /* Force a report. */
+      brw->shader_time.report_time = 0;
+
+      brw_collect_and_report_shader_time(brw);
+      brw_destroy_shader_time(brw);
+   }
+
+   brw_destroy_state(brw);
+   brw_draw_destroy(brw);
+
+   drm_intel_bo_unreference(brw->curbe.curbe_bo);
+   drm_intel_bo_unreference(brw->vs.base.const_bo);
+   drm_intel_bo_unreference(brw->wm.base.const_bo);
+
+   free(brw->curbe.last_buf);
+   free(brw->curbe.next_buf);
+
+   drm_intel_gem_context_destroy(brw->hw_ctx);
+
+   if (ctx->swrast_context) {
+      _swsetup_DestroyContext(&brw->ctx);
+      _tnl_DestroyContext(&brw->ctx);
+   }
+   _vbo_DestroyContext(&brw->ctx);
+
+   if (ctx->swrast_context)
+      _swrast_DestroyContext(&brw->ctx);
+
+   intel_batchbuffer_free(brw);
+
+   drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
+   brw->first_post_swapbuffers_batch = NULL;
+
+   driDestroyOptionCache(&brw->optionCache);
+
+   /* free the Mesa context */
+   _mesa_free_context_data(&brw->ctx);
+
+   ralloc_free(brw);
+   driContextPriv->driverPrivate = NULL;
+}
+
+GLboolean
+intelUnbindContext(__DRIcontext * driContextPriv)
+{
+   /* Unset current context and dispath table */
+   _mesa_make_current(NULL, NULL, NULL);
+
+   return true;
+}
+
+/**
+ * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
+ * on window system framebuffers.
+ *
+ * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
+ * your renderbuffer can do sRGB encode, and you can flip a switch that does
+ * sRGB encode if the renderbuffer can handle it.  You can ask specifically
+ * for a visual where you're guaranteed to be capable, but it turns out that
+ * everyone just makes all their ARGB8888 visuals capable and doesn't offer
+ * incapable ones, becuase there's no difference between the two in resources
+ * used.  Applications thus get built that accidentally rely on the default
+ * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
+ * great...
+ *
+ * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
+ * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
+ * So they removed the enable knob and made it "if the renderbuffer is sRGB
+ * capable, do sRGB encode".  Then, for your window system renderbuffers, you
+ * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
+ * and get no sRGB encode (assuming that both kinds of visual are available).
+ * Thus our choice to support sRGB by default on our visuals for desktop would
+ * result in broken rendering of GLES apps that aren't expecting sRGB encode.
+ *
+ * Unfortunately, renderbuffer setup happens before a context is created.  So
+ * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
+ * context (without an sRGB visual, though we don't have sRGB visuals exposed
+ * yet), we go turn that back off before anyone finds out.
+ */
+static void
+intel_gles3_srgb_workaround(struct brw_context *brw,
+                            struct gl_framebuffer *fb)
+{
+   struct gl_context *ctx = &brw->ctx;
+
+   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
+      return;
+
+   /* Some day when we support the sRGB capable bit on visuals available for
+    * GLES, we'll need to respect that and not disable things here.
+    */
+   fb->Visual.sRGBCapable = false;
+   for (int i = 0; i < BUFFER_COUNT; i++) {
+      if (fb->Attachment[i].Renderbuffer &&
+          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
+         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
+      }
+   }
+}
+
+GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
+{
+   struct brw_context *brw;
+   GET_CURRENT_CONTEXT(curCtx);
+
+   if (driContextPriv)
+      brw = (struct brw_context *) driContextPriv->driverPrivate;
+   else
+      brw = NULL;
+
+   /* According to the glXMakeCurrent() man page: "Pending commands to
+    * the previous context, if any, are flushed before it is released."
+    * But only flush if we're actually changing contexts.
+    */
+   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
+      _mesa_flush(curCtx);
+   }
+
+   if (driContextPriv) {
+      struct gl_context *ctx = &brw->ctx;
+      struct gl_framebuffer *fb, *readFb;
+
+      if (driDrawPriv == NULL && driReadPriv == NULL) {
+         fb = _mesa_get_incomplete_framebuffer();
+         readFb = _mesa_get_incomplete_framebuffer();
+      } else {
+         fb = driDrawPriv->driverPrivate;
+         readFb = driReadPriv->driverPrivate;
+         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+      }
+
+      /* The sRGB workaround changes the renderbuffer's format. We must change
+       * the format before the renderbuffer's miptree get's allocated, otherwise
+       * the formats of the renderbuffer and its miptree will differ.
+       */
+      intel_gles3_srgb_workaround(brw, fb);
+      intel_gles3_srgb_workaround(brw, readFb);
+
+      intel_prepare_render(brw);
+      _mesa_make_current(ctx, fb, readFb);
+   } else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return true;
+}
+
+void
+intel_resolve_for_dri2_flush(struct brw_context *brw,
+                             __DRIdrawable *drawable)
+{
+   if (brw->gen < 6) {
+      /* MSAA and fast color clear are not supported, so don't waste time
+       * checking whether a resolve is needed.
+       */
+      return;
+   }
+
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+
+   /* Usually, only the back buffer will need to be downsampled. However,
+    * the front buffer will also need it if the user has rendered into it.
+    */
+   static const gl_buffer_index buffers[2] = {
+         BUFFER_BACK_LEFT,
+         BUFFER_FRONT_LEFT,
+   };
+
+   for (int i = 0; i < 2; ++i) {
+      rb = intel_get_renderbuffer(fb, buffers[i]);
+      if (rb == NULL || rb->mt == NULL)
+         continue;
+      if (rb->mt->num_samples <= 1)
+         intel_miptree_resolve_color(brw, rb->mt);
+      else
+         intel_miptree_downsample(brw, rb->mt);
+   }
+}
+
+static unsigned
+intel_bits_per_pixel(const struct intel_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
+}
+
+static void
+intel_query_dri2_buffers(struct brw_context *brw,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *count);
+
+static void
+intel_process_dri2_buffer(struct brw_context *brw,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name);
+
+void
+intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   struct brw_context *brw = context->driverPrivate;
+   __DRIbuffer *buffers = NULL;
+   int i, count;
+   const char *region_name;
+
+   /* Set this up front, so that in case our buffers get invalidated
+    * while we're getting new buffers, we don't clobber the stamp and
+    * thus ignore the invalidate. */
+   drawable->lastStamp = drawable->dri2.stamp;
+
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
+      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+
+   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
+
+   if (buffers == NULL)
+      return;
+
+   for (i = 0; i < count; i++) {
+       switch (buffers[i].attachment) {
+       case __DRI_BUFFER_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 front buffer";
+           break;
+
+       case __DRI_BUFFER_FAKE_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 fake front buffer";
+           break;
+
+       case __DRI_BUFFER_BACK_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+           region_name = "dri2 back buffer";
+           break;
+
+       case __DRI_BUFFER_DEPTH:
+       case __DRI_BUFFER_HIZ:
+       case __DRI_BUFFER_DEPTH_STENCIL:
+       case __DRI_BUFFER_STENCIL:
+       case __DRI_BUFFER_ACCUM:
+       default:
+           fprintf(stderr,
+                   "unhandled buffer attach event, attachment type %d\n",
+                   buffers[i].attachment);
+           return;
+       }
+
+       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
+   }
+
+   driUpdateFramebufferSize(&brw->ctx, drawable);
+}
+
+/**
+ * intel_prepare_render should be called anywhere that curent read/drawbuffer
+ * state is required.
+ */
+void
+intel_prepare_render(struct brw_context *brw)
+{
+   __DRIcontext *driContext = brw->driContext;
+   __DRIdrawable *drawable;
+
+   drawable = driContext->driDrawablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.draw_stamp = drawable->dri2.stamp;
+   }
+
+   drawable = driContext->driReadablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.read_stamp = drawable->dri2.stamp;
+   }
+
+   /* If we're currently rendering to the front buffer, the rendering
+    * that will happen next will probably dirty the front buffer.  So
+    * mark it as dirty here.
+    */
+   if (brw->is_front_buffer_rendering)
+      brw->front_buffer_dirty = true;
+
+   /* Wait for the swapbuffers before the one we just emitted, so we
+    * don't get too many swaps outstanding for apps that are GPU-heavy
+    * but not CPU-heavy.
+    *
+    * We're using intelDRI2Flush (called from the loader before
+    * swapbuffer) and glFlush (for front buffer rendering) as the
+    * indicator that a frame is done and then throttle when we get
+    * here as we prepare to render the next frame.  At this point for
+    * round trips for swap/copy and getting new buffers are done and
+    * we'll spend less time waiting on the GPU.
+    *
+    * Unfortunately, we don't have a handle to the batch containing
+    * the swap, and getting our hands on that doesn't seem worth it,
+    * so we just us the first batch we emitted after the last swap.
+    */
+   if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
+      if (!brw->disable_throttling)
+         drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
+      drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
+      brw->first_post_swapbuffers_batch = NULL;
+      brw->need_throttle = false;
+   }
+}
+
+/**
+ * \brief Query DRI2 to obtain a DRIdrawable's buffers.
+ *
+ * To determine which DRI buffers to request, examine the renderbuffers
+ * attached to the drawable's framebuffer. Then request the buffers with
+ * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \param drawable      Drawable whose buffers are queried.
+ * \param buffers       [out] List of buffers returned by DRI2 query.
+ * \param buffer_count  [out] Number of buffers returned.
+ *
+ * \see intel_update_renderbuffers()
+ * \see DRI2GetBuffers()
+ * \see DRI2GetBuffersWithFormat()
+ */
+static void
+intel_query_dri2_buffers(struct brw_context *brw,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *buffer_count)
+{
+   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   int i = 0;
+   unsigned attachments[8];
+
+   struct intel_renderbuffer *front_rb;
+   struct intel_renderbuffer *back_rb;
+
+   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+
+   memset(attachments, 0, sizeof(attachments));
+   if ((brw->is_front_buffer_rendering ||
+        brw->is_front_buffer_reading ||
+        !back_rb) && front_rb) {
+      /* If a fake front buffer is in use, then querying for
+       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
+       * the real front buffer to the fake front buffer.  So before doing the
+       * query, we need to make sure all the pending drawing has landed in the
+       * real front buffer.
+       */
+      intel_batchbuffer_flush(brw);
+      intel_flush_front(&brw->ctx);
+
+      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+      attachments[i++] = intel_bits_per_pixel(front_rb);
+   } else if (front_rb && brw->front_buffer_dirty) {
+      /* We have pending front buffer rendering, but we aren't querying for a
+       * front buffer.  If the front buffer we have is a fake front buffer,
+       * the X server is going to throw it away when it processes the query.
+       * So before doing the query, make sure all the pending drawing has
+       * landed in the real front buffer.
+       */
+      intel_batchbuffer_flush(brw);
+      intel_flush_front(&brw->ctx);
+   }
+
+   if (back_rb) {
+      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+      attachments[i++] = intel_bits_per_pixel(back_rb);
+   }
+
+   assert(i <= ARRAY_SIZE(attachments));
+
+   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
+                                                        &drawable->w,
+                                                        &drawable->h,
+                                                        attachments, i / 2,
+                                                        buffer_count,
+                                                        drawable->loaderPrivate);
+}
+
+/**
+ * \brief Assign a DRI buffer's DRM region to a renderbuffer.
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \par Note:
+ *    DRI buffers whose attachment point is DRI2BufferStencil or
+ *    DRI2BufferDepthStencil are handled as special cases.
+ *
+ * \param buffer_name is a human readable name, such as "dri2 front buffer",
+ *        that is passed to intel_region_alloc_for_handle().
+ *
+ * \see intel_update_renderbuffers()
+ * \see intel_region_alloc_for_handle()
+ */
+static void
+intel_process_dri2_buffer(struct brw_context *brw,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name)
+{
+   struct intel_region *region = NULL;
+
+   if (!rb)
+      return;
+
+   unsigned num_samples = rb->Base.Base.NumSamples;
+
+   /* We try to avoid closing and reopening the same BO name, because the first
+    * use of a mapping of the buffer involves a bunch of page faulting which is
+    * moderately expensive.
+    */
+   if (num_samples == 0) {
+       if (rb->mt &&
+           rb->mt->region &&
+           rb->mt->region->name == buffer->name)
+          return;
+   } else {
+       if (rb->mt &&
+           rb->mt->singlesample_mt &&
+           rb->mt->singlesample_mt->region &&
+           rb->mt->singlesample_mt->region->name == buffer->name)
+          return;
+   }
+
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
+      fprintf(stderr,
+              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
+              buffer->name, buffer->attachment,
+              buffer->cpp, buffer->pitch);
+   }
+
+   intel_miptree_release(&rb->mt);
+   region = intel_region_alloc_for_handle(brw->intelScreen,
+                                          buffer->cpp,
+                                          drawable->w,
+                                          drawable->h,
+                                          buffer->pitch,
+                                          buffer->name,
+                                          buffer_name);
+   if (!region)
+      return;
+
+   rb->mt = intel_miptree_create_for_dri2_buffer(brw,
+                                                 buffer->attachment,
+                                                 intel_rb_format(rb),
+                                                 num_samples,
+                                                 region);
+   intel_region_release(&region);
+}