src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keith@tungstengraphics.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/imports.h"
  38 #include "main/macros.h"
  39 #include "main/points.h"
  40 #include "main/version.h"
  41 #include "main/vtxfmt.h"
  42
  43 #include "vbo/vbo_context.h"
  44
  45 #include "drivers/common/driverfuncs.h"
  46 #include "drivers/common/meta.h"
  47 #include "utils.h"
  48
  49 #include "brw_context.h"
  50 #include "brw_defines.h"
  51 #include "brw_draw.h"
  52 #include "brw_state.h"
  53
  54 #include "intel_batchbuffer.h"
  55 #include "intel_buffer_objects.h"
  56 #include "intel_buffers.h"
  57 #include "intel_fbo.h"
  58 #include "intel_mipmap_tree.h"
  59 #include "intel_pixel.h"
  60 #include "intel_regions.h"
  61 #include "intel_tex.h"
  62 #include "intel_tex_obj.h"
  63
  64 #include "swrast_setup/swrast_setup.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_pipeline.h"
  67 #include "glsl/ralloc.h"
  68
  69 /***************************************
  70  * Mesa's Driver Functions
  71  ***************************************/
  72
  73 static size_t
  74 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  75                              GLenum internalFormat, int samples[16])
  76 {
  77    struct brw_context *brw = brw_context(ctx);
  78
  79    (void) target;
  80
  81    switch (brw->gen) {
  82    case 7:
  83       samples[0] = 8;
  84       samples[1] = 4;
  85       return 2;
  86
  87    case 6:
  88       samples[0] = 4;
  89       return 1;
  90
  91    default:
  92       samples[0] = 1;
  93       return 1;
  94    }
  95 }
  96
  97 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  98
  99 const char *
 100 brw_get_renderer_string(unsigned deviceID)
 101 {
 102    const char *chipset;
 103    static char buffer[128];
 104
 105    switch (deviceID) {
 106 #undef CHIPSET
 107 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 108 #include "pci_ids/i965_pci_ids.h"
 109    default:
 110       chipset = "Unknown Intel Chipset";
 111       break;
 112    }
 113
 114    (void) driGetRendererString(buffer, chipset, 0);
 115    return buffer;
 116 }
 117
 118 static const GLubyte *
 119 intelGetString(struct gl_context * ctx, GLenum name)
 120 {
 121    const struct brw_context *const brw = brw_context(ctx);
 122
 123    switch (name) {
 124    case GL_VENDOR:
 125       return (GLubyte *) brw_vendor_string;
 126
 127    case GL_RENDERER:
 128       return
 129          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 130
 131    default:
 132       return NULL;
 133    }
 134 }
 135
 136 static void
 137 intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 138 {
 139    struct brw_context *brw = brw_context(ctx);
 140    __DRIcontext *driContext = brw->driContext;
 141
 142    (void) x;
 143    (void) y;
 144    (void) w;
 145    (void) h;
 146
 147    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 148       dri2InvalidateDrawable(driContext->driDrawablePriv);
 149       dri2InvalidateDrawable(driContext->driReadablePriv);
 150    }
 151 }
 152
 153 static void
 154 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
 155 {
 156    struct brw_context *brw = brw_context(ctx);
 157
 158    if (ctx->swrast_context)
 159       _swrast_InvalidateState(ctx, new_state);
 160    _vbo_InvalidateState(ctx, new_state);
 161
 162    brw->NewGLState |= new_state;
 163 }
 164
 165 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 166
 167 static void
 168 intel_flush_front(struct gl_context *ctx)
 169 {
 170    struct brw_context *brw = brw_context(ctx);
 171    __DRIcontext *driContext = brw->driContext;
 172    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 173    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 174
 175    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 176       if (flushFront(screen) && driDrawable &&
 177           driDrawable->loaderPrivate) {
 178
 179          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 180           *
 181           * This potentially resolves both front and back buffer. It
 182           * is unnecessary to resolve the back, but harms nothing except
 183           * performance. And no one cares about front-buffer render
 184           * performance.
 185           */
 186          intel_resolve_for_dri2_flush(brw, driDrawable);
 187          intel_batchbuffer_flush(brw);
 188
 189          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 190
 191          /* We set the dirty bit in intel_prepare_render() if we're
 192           * front buffer rendering once we get there.
 193           */
 194          brw->front_buffer_dirty = false;
 195       }
 196    }
 197 }
 198
 199 static void
 200 intel_glFlush(struct gl_context *ctx)
 201 {
 202    struct brw_context *brw = brw_context(ctx);
 203
 204    intel_batchbuffer_flush(brw);
 205    intel_flush_front(ctx);
 206    if (brw->is_front_buffer_rendering)
 207       brw->need_throttle = true;
 208 }
 209
 210 void
 211 intelFinish(struct gl_context * ctx)
 212 {
 213    struct brw_context *brw = brw_context(ctx);
 214
 215    intel_glFlush(ctx);
 216
 217    if (brw->batch.last_bo)
 218       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 219 }
 220
 221 static void
 222 brw_init_driver_functions(struct brw_context *brw,
 223                           struct dd_function_table *functions)
 224 {
 225    _mesa_init_driver_functions(functions);
 226
 227    /* GLX uses DRI2 invalidate events to handle window resizing.
 228     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 229     * which doesn't provide a mechanism for snooping the event queues.
 230     *
 231     * So EGL still relies on viewport hacks to handle window resizing.
 232     * This should go away with DRI3000.
 233     */
 234    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 235       functions->Viewport = intel_viewport;
 236
 237    functions->Flush = intel_glFlush;
 238    functions->Finish = intelFinish;
 239    functions->GetString = intelGetString;
 240    functions->UpdateState = intelInvalidateState;
 241
 242    intelInitTextureFuncs(functions);
 243    intelInitTextureImageFuncs(functions);
 244    intelInitTextureSubImageFuncs(functions);
 245    intelInitTextureCopyImageFuncs(functions);
 246    intelInitClearFuncs(functions);
 247    intelInitBufferFuncs(functions);
 248    intelInitPixelFuncs(functions);
 249    intelInitBufferObjectFuncs(functions);
 250    intel_init_syncobj_functions(functions);
 251    brw_init_object_purgeable_functions(functions);
 252
 253    brwInitFragProgFuncs( functions );
 254    brw_init_common_queryobj_functions(functions);
 255    if (brw->gen >= 6)
 256       gen6_init_queryobj_functions(functions);
 257    else
 258       gen4_init_queryobj_functions(functions);
 259
 260    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 261
 262    functions->NewTransformFeedback = brw_new_transform_feedback;
 263    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 264    functions->GetTransformFeedbackVertexCount =
 265       brw_get_transform_feedback_vertex_count;
 266    if (brw->gen >= 7) {
 267       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 268       functions->EndTransformFeedback = gen7_end_transform_feedback;
 269       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 270       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 271    } else {
 272       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 273       functions->EndTransformFeedback = brw_end_transform_feedback;
 274    }
 275
 276    if (brw->gen >= 6)
 277       functions->GetSamplePosition = gen6_get_sample_position;
 278 }
 279
 280 /**
 281  * Return array of MSAA modes supported by the hardware. The array is
 282  * terminated by -1 and sorted in decreasing order.
 283  */
 284 static const int*
 285 brw_supported_msaa_modes(const struct brw_context *brw)
 286 {
 287    static const int gen7_samples[] = {8, 4, 0, -1};
 288    static const int gen6_samples[] = {4, 0, -1};
 289    static const int gen4_samples[] = {0, -1};
 290
 291    if (brw->gen >= 7) {
 292       return gen7_samples;
 293    } else if (brw->gen == 6) {
 294       return gen6_samples;
 295    } else {
 296       return gen4_samples;
 297    }
 298 }
 299
 300 /**
 301  * Override GL_MAX_SAMPLES and related constants according to value of driconf
 302  * option 'clamp_max_samples'.
 303  */
 304 static void
 305 brw_override_max_samples(struct brw_context *brw)
 306 {
 307    const int clamp_max_samples = driQueryOptioni(&brw->optionCache,
 308                                                  "clamp_max_samples");
 309    if (clamp_max_samples < 0)
 310       return;
 311
 312    const int *supported_msaa_modes = brw_supported_msaa_modes(brw);
 313    int max_samples = 0;
 314
 315    /* Select the largest supported MSAA mode that does not exceed
 316     * clamp_max_samples.
 317     */
 318    for (int i = 0; supported_msaa_modes[i] != -1; ++i) {
 319       if (supported_msaa_modes[i] <= clamp_max_samples) {
 320          max_samples = supported_msaa_modes[i];
 321          break;
 322       }
 323    }
 324
 325    brw->ctx.Const.MaxSamples = max_samples;
 326    brw->ctx.Const.MaxColorTextureSamples = max_samples;
 327    brw->ctx.Const.MaxDepthTextureSamples = max_samples;
 328    brw->ctx.Const.MaxIntegerSamples = max_samples;
 329 }
 330
 331 static void
 332 brw_initialize_context_constants(struct brw_context *brw)
 333 {
 334    struct gl_context *ctx = &brw->ctx;
 335
 336    ctx->Const.QueryCounterBits.Timestamp = 36;
 337
 338    ctx->Const.StripTextureBorder = true;
 339
 340    ctx->Const.MaxDualSourceDrawBuffers = 1;
 341    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 342    ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 343    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 344    ctx->Const.MaxTextureUnits =
 345       MIN2(ctx->Const.MaxTextureCoordUnits,
 346            ctx->Const.FragmentProgram.MaxTextureImageUnits);
 347    ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 348    if (brw->gen >= 7)
 349       ctx->Const.GeometryProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 350    else
 351       ctx->Const.GeometryProgram.MaxTextureImageUnits = 0;
 352    ctx->Const.MaxCombinedTextureImageUnits =
 353       ctx->Const.VertexProgram.MaxTextureImageUnits +
 354       ctx->Const.FragmentProgram.MaxTextureImageUnits +
 355       ctx->Const.GeometryProgram.MaxTextureImageUnits;
 356
 357    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 358    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 359       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 360    ctx->Const.Max3DTextureLevels = 9;
 361    ctx->Const.MaxCubeTextureLevels = 12;
 362
 363    if (brw->gen >= 7)
 364       ctx->Const.MaxArrayTextureLayers = 2048;
 365    else
 366       ctx->Const.MaxArrayTextureLayers = 512;
 367
 368    ctx->Const.MaxTextureRectSize = 1 << 12;
 369
 370    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 371
 372    ctx->Const.MaxRenderbufferSize = 8192;
 373
 374    /* Hardware only supports a limited number of transform feedback buffers.
 375     * So we need to override the Mesa default (which is based only on software
 376     * limits).
 377     */
 378    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 379
 380    /* On Gen6, in the worst case, we use up one binding table entry per
 381     * transform feedback component (see comments above the definition of
 382     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 383     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 384     * BRW_MAX_SOL_BINDINGS.
 385     *
 386     * In "separate components" mode, we need to divide this value by
 387     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 388     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 389     */
 390    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 391    ctx->Const.MaxTransformFeedbackSeparateComponents =
 392       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 393
 394    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 395
 396    const int max_samples = brw_supported_msaa_modes(brw)[0];
 397    ctx->Const.MaxSamples = max_samples;
 398    ctx->Const.MaxColorTextureSamples = max_samples;
 399    ctx->Const.MaxDepthTextureSamples = max_samples;
 400    ctx->Const.MaxIntegerSamples = max_samples;
 401
 402    if (brw->gen >= 7)
 403       ctx->Const.MaxProgramTextureGatherComponents = 4;
 404
 405    ctx->Const.MinLineWidth = 1.0;
 406    ctx->Const.MinLineWidthAA = 1.0;
 407    ctx->Const.MaxLineWidth = 5.0;
 408    ctx->Const.MaxLineWidthAA = 5.0;
 409    ctx->Const.LineWidthGranularity = 0.5;
 410
 411    ctx->Const.MinPointSize = 1.0;
 412    ctx->Const.MinPointSizeAA = 1.0;
 413    ctx->Const.MaxPointSize = 255.0;
 414    ctx->Const.MaxPointSizeAA = 255.0;
 415    ctx->Const.PointSizeGranularity = 1.0;
 416
 417    if (brw->gen >= 5 || brw->is_g4x)
 418       ctx->Const.MaxClipPlanes = 8;
 419
 420    ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
 421    ctx->Const.VertexProgram.MaxAluInstructions = 0;
 422    ctx->Const.VertexProgram.MaxTexInstructions = 0;
 423    ctx->Const.VertexProgram.MaxTexIndirections = 0;
 424    ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
 425    ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
 426    ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
 427    ctx->Const.VertexProgram.MaxNativeAttribs = 16;
 428    ctx->Const.VertexProgram.MaxNativeTemps = 256;
 429    ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
 430    ctx->Const.VertexProgram.MaxNativeParameters = 1024;
 431    ctx->Const.VertexProgram.MaxEnvParams =
 432       MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
 433            ctx->Const.VertexProgram.MaxEnvParams);
 434
 435    ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
 436    ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
 437    ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
 438    ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
 439    ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
 440    ctx->Const.FragmentProgram.MaxNativeTemps = 256;
 441    ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
 442    ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
 443    ctx->Const.FragmentProgram.MaxEnvParams =
 444       MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
 445            ctx->Const.FragmentProgram.MaxEnvParams);
 446
 447    /* Fragment shaders use real, 32-bit twos-complement integers for all
 448     * integer types.
 449     */
 450    ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
 451    ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
 452    ctx->Const.FragmentProgram.LowInt.Precision = 0;
 453    ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
 454    ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
 455
 456    if (brw->gen >= 7) {
 457       ctx->Const.FragmentProgram.MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 458       ctx->Const.VertexProgram.MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 459       ctx->Const.GeometryProgram.MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 460       ctx->Const.FragmentProgram.MaxAtomicBuffers = BRW_MAX_ABO;
 461       ctx->Const.VertexProgram.MaxAtomicBuffers = BRW_MAX_ABO;
 462       ctx->Const.GeometryProgram.MaxAtomicBuffers = BRW_MAX_ABO;
 463       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 464    }
 465
 466    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 467     * but we're not sure how it's actually done for vertex order,
 468     * that affect provoking vertex decision. Always use last vertex
 469     * convention for quad primitive which works as expected for now.
 470     */
 471    if (brw->gen >= 6)
 472       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 473
 474    ctx->Const.NativeIntegers = true;
 475    ctx->Const.UniformBooleanTrue = 1;
 476
 477    /* From the gen4 PRM, volume 4 page 127:
 478     *
 479     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 480     *      the base address of the first element of the surface, computed in
 481     *      software by adding the surface base address to the byte offset of
 482     *      the element in the buffer."
 483     *
 484     * However, unaligned accesses are slower, so enforce buffer alignment.
 485     */
 486    ctx->Const.UniformBufferOffsetAlignment = 16;
 487    ctx->Const.TextureBufferOffsetAlignment = 16;
 488
 489    if (brw->gen >= 6) {
 490       ctx->Const.MaxVarying = 32;
 491       ctx->Const.VertexProgram.MaxOutputComponents = 128;
 492       ctx->Const.GeometryProgram.MaxInputComponents = 64;
 493       ctx->Const.GeometryProgram.MaxOutputComponents = 128;
 494       ctx->Const.FragmentProgram.MaxInputComponents = 128;
 495    }
 496
 497    /* We want the GLSL compiler to emit code that uses condition codes */
 498    for (int i = 0; i < MESA_SHADER_TYPES; i++) {
 499       ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 500       ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
 501       ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
 502       ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
 503       ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 504       ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
 505
 506       ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
 507          (i == MESA_SHADER_FRAGMENT);
 508       ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
 509          (i == MESA_SHADER_FRAGMENT);
 510       ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
 511    }
 512
 513    ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
 514 }
 515
 516 /**
 517  * Process driconf (drirc) options, setting appropriate context flags.
 518  *
 519  * intelInitExtensions still pokes at optionCache directly, in order to
 520  * avoid advertising various extensions.  No flags are set, so it makes
 521  * sense to continue doing that there.
 522  */
 523 static void
 524 brw_process_driconf_options(struct brw_context *brw)
 525 {
 526    struct gl_context *ctx = &brw->ctx;
 527
 528    driOptionCache *options = &brw->optionCache;
 529    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 530                        brw->driContext->driScreenPriv->myNum, "i965");
 531
 532    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 533    switch (bo_reuse_mode) {
 534    case DRI_CONF_BO_REUSE_DISABLED:
 535       break;
 536    case DRI_CONF_BO_REUSE_ALL:
 537       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 538       break;
 539    }
 540
 541    if (!driQueryOptionb(options, "hiz")) {
 542        brw->has_hiz = false;
 543        /* On gen6, you can only do separate stencil with HIZ. */
 544        if (brw->gen == 6)
 545           brw->has_separate_stencil = false;
 546    }
 547
 548    if (driQueryOptionb(options, "always_flush_batch")) {
 549       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 550       brw->always_flush_batch = true;
 551    }
 552
 553    if (driQueryOptionb(options, "always_flush_cache")) {
 554       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 555       brw->always_flush_cache = true;
 556    }
 557
 558    if (driQueryOptionb(options, "disable_throttling")) {
 559       fprintf(stderr, "disabling flush throttling\n");
 560       brw->disable_throttling = true;
 561    }
 562
 563    brw->disable_derivative_optimization =
 564       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 565
 566    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 567
 568    ctx->Const.ForceGLSLExtensionsWarn =
 569       driQueryOptionb(options, "force_glsl_extensions_warn");
 570
 571    ctx->Const.DisableGLSLLineContinuations =
 572       driQueryOptionb(options, "disable_glsl_line_continuations");
 573 }
 574
 575 GLboolean
 576 brwCreateContext(gl_api api,
 577                  const struct gl_config *mesaVis,
 578                  __DRIcontext *driContextPriv,
 579                  unsigned major_version,
 580                  unsigned minor_version,
 581                  uint32_t flags,
 582                  bool notify_reset,
 583                  unsigned *dri_ctx_error,
 584                  void *sharedContextPrivate)
 585 {
 586    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 587    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 588    struct intel_screen *screen = sPriv->driverPrivate;
 589    const struct brw_device_info *devinfo = screen->devinfo;
 590    struct dd_function_table functions;
 591    struct gl_config visual;
 592
 593    if (flags & ~(__DRI_CTX_FLAG_DEBUG
 594                  | __DRI_CTX_FLAG_FORWARD_COMPATIBLE
 595                  | __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS)) {
 596       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 597       return false;
 598    }
 599
 600    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 601    if (!brw) {
 602       printf("%s: failed to alloc context\n", __FUNCTION__);
 603       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 604       return false;
 605    }
 606
 607    driContextPriv->driverPrivate = brw;
 608    brw->driContext = driContextPriv;
 609    brw->intelScreen = screen;
 610    brw->bufmgr = screen->bufmgr;
 611
 612    brw->gen = devinfo->gen;
 613    brw->gt = devinfo->gt;
 614    brw->is_g4x = devinfo->is_g4x;
 615    brw->is_baytrail = devinfo->is_baytrail;
 616    brw->is_haswell = devinfo->is_haswell;
 617    brw->has_llc = devinfo->has_llc;
 618    brw->has_hiz = devinfo->has_hiz_and_separate_stencil && brw->gen < 8;
 619    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 620    brw->has_pln = devinfo->has_pln;
 621    brw->has_compr4 = devinfo->has_compr4;
 622    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 623    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 624    brw->needs_unlit_centroid_workaround =
 625       devinfo->needs_unlit_centroid_workaround;
 626
 627    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 628    brw->has_swizzling = screen->hw_has_swizzling;
 629
 630    if (brw->gen >= 7) {
 631       gen7_init_vtable_surface_functions(brw);
 632       gen7_init_vtable_sampler_functions(brw);
 633       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 634    } else {
 635       gen4_init_vtable_surface_functions(brw);
 636       gen4_init_vtable_sampler_functions(brw);
 637       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 638    }
 639
 640    brw_init_driver_functions(brw, &functions);
 641
 642    if (notify_reset)
 643       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 644
 645    struct gl_context *ctx = &brw->ctx;
 646
 647    if (mesaVis == NULL) {
 648       memset(&visual, 0, sizeof visual);
 649       mesaVis = &visual;
 650    }
 651
 652    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 653       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 654       printf("%s: failed to init mesa context\n", __FUNCTION__);
 655       intelDestroyContext(driContextPriv);
 656       return false;
 657    }
 658
 659    /* Initialize the software rasterizer and helper modules.
 660     *
 661     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 662     * software fallbacks (which we have to support on legacy GL to do weird
 663     * glDrawPixels(), glBitmap(), and other functions).
 664     */
 665    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 666       _swrast_CreateContext(ctx);
 667    }
 668
 669    _vbo_CreateContext(ctx);
 670    if (ctx->swrast_context) {
 671       _tnl_CreateContext(ctx);
 672       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 673       _swsetup_CreateContext(ctx);
 674
 675       /* Configure swrast to match hardware characteristics: */
 676       _swrast_allow_pixel_fog(ctx, false);
 677       _swrast_allow_vertex_fog(ctx, true);
 678    }
 679
 680    _mesa_meta_init(ctx);
 681
 682    brw_process_driconf_options(brw);
 683    brw_process_intel_debug_variable(brw);
 684    brw_initialize_context_constants(brw);
 685
 686    ctx->Const.ResetStrategy = notify_reset
 687       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 688
 689    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 690    _mesa_init_point(ctx);
 691
 692    intel_batchbuffer_init(brw);
 693
 694    brw_init_state(brw);
 695
 696    intelInitExtensions(ctx);
 697
 698    intel_fbo_init(brw);
 699
 700    if (brw->gen >= 6) {
 701       /* Create a new hardware context.  Using a hardware context means that
 702        * our GPU state will be saved/restored on context switch, allowing us
 703        * to assume that the GPU is in the same state we left it in.
 704        *
 705        * This is required for transform feedback buffer offsets, query objects,
 706        * and also allows us to reduce how much state we have to emit.
 707        */
 708       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 709
 710       if (!brw->hw_ctx) {
 711          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 712          intelDestroyContext(driContextPriv);
 713          return false;
 714       }
 715    }
 716
 717    /* Notification of GPU resets requires hardware contexts and a kernel new
 718     * enough to support DRM_IOCTL_I915_GET_RESET_STATS, which isn't upstream
 719     * yet.
 720     */
 721    if (notify_reset) {
 722       /* This is the wrong error code, but the correct error code (one that
 723        * will cause EGL to generate EGL_BAD_MATCH) doesn't seem to exist.
 724        */
 725       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 726       intelDestroyContext(driContextPriv);
 727       return false;
 728    }
 729
 730    brw_init_surface_formats(brw);
 731
 732    if (brw->is_g4x || brw->gen >= 5) {
 733       brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
 734       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
 735   } else {
 736       brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
 737       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
 738    }
 739
 740    brw->max_vs_threads = devinfo->max_vs_threads;
 741    brw->max_gs_threads = devinfo->max_gs_threads;
 742    brw->max_wm_threads = devinfo->max_wm_threads;
 743    brw->urb.size = devinfo->urb.size;
 744    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 745    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 746    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 747
 748    /* Estimate the size of the mappable aperture into the GTT.  There's an
 749     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 750     * It turns out it's basically always 256MB, though some ancient hardware
 751     * was smaller.
 752     */
 753    uint32_t gtt_size = 256 * 1024 * 1024;
 754
 755    /* We don't want to map two objects such that a memcpy between them would
 756     * just fault one mapping in and then the other over and over forever.  So
 757     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 758     * taken up by things like the framebuffer and the ringbuffer and such, so
 759     * be more conservative.
 760     */
 761    brw->max_gtt_map_object_size = gtt_size / 4;
 762
 763    if (brw->gen == 6)
 764       brw->urb.gen6_gs_previously_active = false;
 765
 766    brw->prim_restart.in_progress = false;
 767    brw->prim_restart.enable_cut_index = false;
 768    brw->gs.enabled = false;
 769
 770    if (brw->gen < 6) {
 771       brw->curbe.last_buf = calloc(1, 4096);
 772       brw->curbe.next_buf = calloc(1, 4096);
 773    }
 774
 775    ctx->VertexProgram._MaintainTnlProgram = true;
 776    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 777
 778    brw_draw_init( brw );
 779
 780    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 781       /* Turn on some extra GL_ARB_debug_output generation. */
 782       brw->perf_debug = true;
 783    }
 784
 785    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 786       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 787
 788    brw_fs_alloc_reg_sets(brw);
 789    brw_vec4_alloc_reg_set(brw);
 790
 791    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 792       brw_init_shader_time(brw);
 793
 794    _mesa_compute_version(ctx);
 795
 796    /* Here we override context constants. We apply the overrides after
 797     * calculation of the context version because we do not want the overridden
 798     * constants to change the version.
 799     */
 800    brw_override_max_samples(brw);
 801
 802    _mesa_initialize_dispatch_tables(ctx);
 803    _mesa_initialize_vbo_vtxfmt(ctx);
 804
 805    if (ctx->Extensions.AMD_performance_monitor) {
 806       brw_init_performance_monitors(brw);
 807    }
 808
 809    return true;
 810 }
 811
 812 void
 813 intelDestroyContext(__DRIcontext * driContextPriv)
 814 {
 815    struct brw_context *brw =
 816       (struct brw_context *) driContextPriv->driverPrivate;
 817    struct gl_context *ctx = &brw->ctx;
 818
 819    assert(brw); /* should never be null */
 820    if (!brw)
 821       return;
 822
 823    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 824    if (INTEL_DEBUG & DEBUG_AUB) {
 825       intel_batchbuffer_flush(brw);
 826       aub_dump_bmp(&brw->ctx);
 827    }
 828
 829    _mesa_meta_free(&brw->ctx);
 830
 831    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 832       /* Force a report. */
 833       brw->shader_time.report_time = 0;
 834
 835       brw_collect_and_report_shader_time(brw);
 836       brw_destroy_shader_time(brw);
 837    }
 838
 839    brw_destroy_state(brw);
 840    brw_draw_destroy(brw);
 841
 842    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 843    drm_intel_bo_unreference(brw->vs.base.const_bo);
 844    drm_intel_bo_unreference(brw->wm.base.const_bo);
 845
 846    free(brw->curbe.last_buf);
 847    free(brw->curbe.next_buf);
 848
 849    drm_intel_gem_context_destroy(brw->hw_ctx);
 850
 851    if (ctx->swrast_context) {
 852       _swsetup_DestroyContext(&brw->ctx);
 853       _tnl_DestroyContext(&brw->ctx);
 854    }
 855    _vbo_DestroyContext(&brw->ctx);
 856
 857    if (ctx->swrast_context)
 858       _swrast_DestroyContext(&brw->ctx);
 859
 860    intel_batchbuffer_free(brw);
 861
 862    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 863    brw->first_post_swapbuffers_batch = NULL;
 864
 865    driDestroyOptionCache(&brw->optionCache);
 866
 867    /* free the Mesa context */
 868    _mesa_free_context_data(&brw->ctx);
 869
 870    ralloc_free(brw);
 871    driContextPriv->driverPrivate = NULL;
 872 }
 873
 874 GLboolean
 875 intelUnbindContext(__DRIcontext * driContextPriv)
 876 {
 877    /* Unset current context and dispath table */
 878    _mesa_make_current(NULL, NULL, NULL);
 879
 880    return true;
 881 }
 882
 883 /**
 884  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 885  * on window system framebuffers.
 886  *
 887  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 888  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 889  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 890  * for a visual where you're guaranteed to be capable, but it turns out that
 891  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 892  * incapable ones, becuase there's no difference between the two in resources
 893  * used.  Applications thus get built that accidentally rely on the default
 894  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 895  * great...
 896  *
 897  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 898  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 899  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 900  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 901  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 902  * and get no sRGB encode (assuming that both kinds of visual are available).
 903  * Thus our choice to support sRGB by default on our visuals for desktop would
 904  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 905  *
 906  * Unfortunately, renderbuffer setup happens before a context is created.  So
 907  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 908  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 909  * yet), we go turn that back off before anyone finds out.
 910  */
 911 static void
 912 intel_gles3_srgb_workaround(struct brw_context *brw,
 913                             struct gl_framebuffer *fb)
 914 {
 915    struct gl_context *ctx = &brw->ctx;
 916
 917    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 918       return;
 919
 920    /* Some day when we support the sRGB capable bit on visuals available for
 921     * GLES, we'll need to respect that and not disable things here.
 922     */
 923    fb->Visual.sRGBCapable = false;
 924    for (int i = 0; i < BUFFER_COUNT; i++) {
 925       if (fb->Attachment[i].Renderbuffer &&
 926           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
 927          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
 928       }
 929    }
 930 }
 931
 932 GLboolean
 933 intelMakeCurrent(__DRIcontext * driContextPriv,
 934                  __DRIdrawable * driDrawPriv,
 935                  __DRIdrawable * driReadPriv)
 936 {
 937    struct brw_context *brw;
 938    GET_CURRENT_CONTEXT(curCtx);
 939
 940    if (driContextPriv)
 941       brw = (struct brw_context *) driContextPriv->driverPrivate;
 942    else
 943       brw = NULL;
 944
 945    /* According to the glXMakeCurrent() man page: "Pending commands to
 946     * the previous context, if any, are flushed before it is released."
 947     * But only flush if we're actually changing contexts.
 948     */
 949    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 950       _mesa_flush(curCtx);
 951    }
 952
 953    if (driContextPriv) {
 954       struct gl_context *ctx = &brw->ctx;
 955       struct gl_framebuffer *fb, *readFb;
 956
 957       if (driDrawPriv == NULL && driReadPriv == NULL) {
 958          fb = _mesa_get_incomplete_framebuffer();
 959          readFb = _mesa_get_incomplete_framebuffer();
 960       } else {
 961          fb = driDrawPriv->driverPrivate;
 962          readFb = driReadPriv->driverPrivate;
 963          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 964          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 965       }
 966
 967       /* The sRGB workaround changes the renderbuffer's format. We must change
 968        * the format before the renderbuffer's miptree get's allocated, otherwise
 969        * the formats of the renderbuffer and its miptree will differ.
 970        */
 971       intel_gles3_srgb_workaround(brw, fb);
 972       intel_gles3_srgb_workaround(brw, readFb);
 973
 974       intel_prepare_render(brw);
 975       _mesa_make_current(ctx, fb, readFb);
 976    } else {
 977       _mesa_make_current(NULL, NULL, NULL);
 978    }
 979
 980    return true;
 981 }
 982
 983 void
 984 intel_resolve_for_dri2_flush(struct brw_context *brw,
 985                              __DRIdrawable *drawable)
 986 {
 987    if (brw->gen < 6) {
 988       /* MSAA and fast color clear are not supported, so don't waste time
 989        * checking whether a resolve is needed.
 990        */
 991       return;
 992    }
 993
 994    struct gl_framebuffer *fb = drawable->driverPrivate;
 995    struct intel_renderbuffer *rb;
 996
 997    /* Usually, only the back buffer will need to be downsampled. However,
 998     * the front buffer will also need it if the user has rendered into it.
 999     */
1000    static const gl_buffer_index buffers[2] = {
1001          BUFFER_BACK_LEFT,
1002          BUFFER_FRONT_LEFT,
1003    };
1004
1005    for (int i = 0; i < 2; ++i) {
1006       rb = intel_get_renderbuffer(fb, buffers[i]);
1007       if (rb == NULL || rb->mt == NULL)
1008          continue;
1009       if (rb->mt->num_samples <= 1)
1010          intel_miptree_resolve_color(brw, rb->mt);
1011       else
1012          intel_miptree_downsample(brw, rb->mt);
1013    }
1014 }
1015
1016 static unsigned
1017 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1018 {
1019    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1020 }
1021
1022 static void
1023 intel_query_dri2_buffers(struct brw_context *brw,
1024                          __DRIdrawable *drawable,
1025                          __DRIbuffer **buffers,
1026                          int *count);
1027
1028 static void
1029 intel_process_dri2_buffer(struct brw_context *brw,
1030                           __DRIdrawable *drawable,
1031                           __DRIbuffer *buffer,
1032                           struct intel_renderbuffer *rb,
1033                           const char *buffer_name);
1034
1035 static void
1036 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1037
1038 static void
1039 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1040 {
1041    struct gl_framebuffer *fb = drawable->driverPrivate;
1042    struct intel_renderbuffer *rb;
1043    __DRIbuffer *buffers = NULL;
1044    int i, count;
1045    const char *region_name;
1046
1047    /* Set this up front, so that in case our buffers get invalidated
1048     * while we're getting new buffers, we don't clobber the stamp and
1049     * thus ignore the invalidate. */
1050    drawable->lastStamp = drawable->dri2.stamp;
1051
1052    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1053       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1054
1055    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1056
1057    if (buffers == NULL)
1058       return;
1059
1060    for (i = 0; i < count; i++) {
1061        switch (buffers[i].attachment) {
1062        case __DRI_BUFFER_FRONT_LEFT:
1063            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1064            region_name = "dri2 front buffer";
1065            break;
1066
1067        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1068            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1069            region_name = "dri2 fake front buffer";
1070            break;
1071
1072        case __DRI_BUFFER_BACK_LEFT:
1073            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1074            region_name = "dri2 back buffer";
1075            break;
1076
1077        case __DRI_BUFFER_DEPTH:
1078        case __DRI_BUFFER_HIZ:
1079        case __DRI_BUFFER_DEPTH_STENCIL:
1080        case __DRI_BUFFER_STENCIL:
1081        case __DRI_BUFFER_ACCUM:
1082        default:
1083            fprintf(stderr,
1084                    "unhandled buffer attach event, attachment type %d\n",
1085                    buffers[i].attachment);
1086            return;
1087        }
1088
1089        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1090    }
1091
1092 }
1093
1094 void
1095 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1096 {
1097    struct brw_context *brw = context->driverPrivate;
1098    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1099
1100    /* Set this up front, so that in case our buffers get invalidated
1101     * while we're getting new buffers, we don't clobber the stamp and
1102     * thus ignore the invalidate. */
1103    drawable->lastStamp = drawable->dri2.stamp;
1104
1105    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1106       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1107
1108    if (screen->image.loader)
1109       intel_update_image_buffers(brw, drawable);
1110    else
1111       intel_update_dri2_buffers(brw, drawable);
1112
1113    driUpdateFramebufferSize(&brw->ctx, drawable);
1114 }
1115
1116 /**
1117  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1118  * state is required.
1119  */
1120 void
1121 intel_prepare_render(struct brw_context *brw)
1122 {
1123    __DRIcontext *driContext = brw->driContext;
1124    __DRIdrawable *drawable;
1125
1126    drawable = driContext->driDrawablePriv;
1127    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1128       if (drawable->lastStamp != drawable->dri2.stamp)
1129          intel_update_renderbuffers(driContext, drawable);
1130       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1131    }
1132
1133    drawable = driContext->driReadablePriv;
1134    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1135       if (drawable->lastStamp != drawable->dri2.stamp)
1136          intel_update_renderbuffers(driContext, drawable);
1137       driContext->dri2.read_stamp = drawable->dri2.stamp;
1138    }
1139
1140    /* If we're currently rendering to the front buffer, the rendering
1141     * that will happen next will probably dirty the front buffer.  So
1142     * mark it as dirty here.
1143     */
1144    if (brw->is_front_buffer_rendering)
1145       brw->front_buffer_dirty = true;
1146
1147    /* Wait for the swapbuffers before the one we just emitted, so we
1148     * don't get too many swaps outstanding for apps that are GPU-heavy
1149     * but not CPU-heavy.
1150     *
1151     * We're using intelDRI2Flush (called from the loader before
1152     * swapbuffer) and glFlush (for front buffer rendering) as the
1153     * indicator that a frame is done and then throttle when we get
1154     * here as we prepare to render the next frame.  At this point for
1155     * round trips for swap/copy and getting new buffers are done and
1156     * we'll spend less time waiting on the GPU.
1157     *
1158     * Unfortunately, we don't have a handle to the batch containing
1159     * the swap, and getting our hands on that doesn't seem worth it,
1160     * so we just us the first batch we emitted after the last swap.
1161     */
1162    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1163       if (!brw->disable_throttling)
1164          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1165       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1166       brw->first_post_swapbuffers_batch = NULL;
1167       brw->need_throttle = false;
1168    }
1169 }
1170
1171 /**
1172  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1173  *
1174  * To determine which DRI buffers to request, examine the renderbuffers
1175  * attached to the drawable's framebuffer. Then request the buffers with
1176  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1177  *
1178  * This is called from intel_update_renderbuffers().
1179  *
1180  * \param drawable      Drawable whose buffers are queried.
1181  * \param buffers       [out] List of buffers returned by DRI2 query.
1182  * \param buffer_count  [out] Number of buffers returned.
1183  *
1184  * \see intel_update_renderbuffers()
1185  * \see DRI2GetBuffers()
1186  * \see DRI2GetBuffersWithFormat()
1187  */
1188 static void
1189 intel_query_dri2_buffers(struct brw_context *brw,
1190                          __DRIdrawable *drawable,
1191                          __DRIbuffer **buffers,
1192                          int *buffer_count)
1193 {
1194    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1195    struct gl_framebuffer *fb = drawable->driverPrivate;
1196    int i = 0;
1197    unsigned attachments[8];
1198
1199    struct intel_renderbuffer *front_rb;
1200    struct intel_renderbuffer *back_rb;
1201
1202    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1203    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1204
1205    memset(attachments, 0, sizeof(attachments));
1206    if ((brw->is_front_buffer_rendering ||
1207         brw->is_front_buffer_reading ||
1208         !back_rb) && front_rb) {
1209       /* If a fake front buffer is in use, then querying for
1210        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1211        * the real front buffer to the fake front buffer.  So before doing the
1212        * query, we need to make sure all the pending drawing has landed in the
1213        * real front buffer.
1214        */
1215       intel_batchbuffer_flush(brw);
1216       intel_flush_front(&brw->ctx);
1217
1218       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1219       attachments[i++] = intel_bits_per_pixel(front_rb);
1220    } else if (front_rb && brw->front_buffer_dirty) {
1221       /* We have pending front buffer rendering, but we aren't querying for a
1222        * front buffer.  If the front buffer we have is a fake front buffer,
1223        * the X server is going to throw it away when it processes the query.
1224        * So before doing the query, make sure all the pending drawing has
1225        * landed in the real front buffer.
1226        */
1227       intel_batchbuffer_flush(brw);
1228       intel_flush_front(&brw->ctx);
1229    }
1230
1231    if (back_rb) {
1232       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1233       attachments[i++] = intel_bits_per_pixel(back_rb);
1234    }
1235
1236    assert(i <= ARRAY_SIZE(attachments));
1237
1238    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1239                                                         &drawable->w,
1240                                                         &drawable->h,
1241                                                         attachments, i / 2,
1242                                                         buffer_count,
1243                                                         drawable->loaderPrivate);
1244 }
1245
1246 /**
1247  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1248  *
1249  * This is called from intel_update_renderbuffers().
1250  *
1251  * \par Note:
1252  *    DRI buffers whose attachment point is DRI2BufferStencil or
1253  *    DRI2BufferDepthStencil are handled as special cases.
1254  *
1255  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1256  *        that is passed to intel_region_alloc_for_handle().
1257  *
1258  * \see intel_update_renderbuffers()
1259  * \see intel_region_alloc_for_handle()
1260  */
1261 static void
1262 intel_process_dri2_buffer(struct brw_context *brw,
1263                           __DRIdrawable *drawable,
1264                           __DRIbuffer *buffer,
1265                           struct intel_renderbuffer *rb,
1266                           const char *buffer_name)
1267 {
1268    struct intel_region *region = NULL;
1269
1270    if (!rb)
1271       return;
1272
1273    unsigned num_samples = rb->Base.Base.NumSamples;
1274
1275    /* We try to avoid closing and reopening the same BO name, because the first
1276     * use of a mapping of the buffer involves a bunch of page faulting which is
1277     * moderately expensive.
1278     */
1279    if (num_samples == 0) {
1280        if (rb->mt &&
1281            rb->mt->region &&
1282            rb->mt->region->name == buffer->name)
1283           return;
1284    } else {
1285        if (rb->mt &&
1286            rb->mt->singlesample_mt &&
1287            rb->mt->singlesample_mt->region &&
1288            rb->mt->singlesample_mt->region->name == buffer->name)
1289           return;
1290    }
1291
1292    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1293       fprintf(stderr,
1294               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1295               buffer->name, buffer->attachment,
1296               buffer->cpp, buffer->pitch);
1297    }
1298
1299    intel_miptree_release(&rb->mt);
1300    region = intel_region_alloc_for_handle(brw->intelScreen,
1301                                           buffer->cpp,
1302                                           drawable->w,
1303                                           drawable->h,
1304                                           buffer->pitch,
1305                                           buffer->name,
1306                                           buffer_name);
1307    if (!region)
1308       return;
1309
1310    rb->mt = intel_miptree_create_for_dri2_buffer(brw,
1311                                                  buffer->attachment,
1312                                                  intel_rb_format(rb),
1313                                                  num_samples,
1314                                                  region);
1315    intel_region_release(&region);
1316 }
1317
1318 /**
1319  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1320  *
1321  * To determine which DRI buffers to request, examine the renderbuffers
1322  * attached to the drawable's framebuffer. Then request the buffers from
1323  * the image loader
1324  *
1325  * This is called from intel_update_renderbuffers().
1326  *
1327  * \param drawable      Drawable whose buffers are queried.
1328  * \param buffers       [out] List of buffers returned by DRI2 query.
1329  * \param buffer_count  [out] Number of buffers returned.
1330  *
1331  * \see intel_update_renderbuffers()
1332  */
1333
1334 static void
1335 intel_update_image_buffer(struct brw_context *intel,
1336                           __DRIdrawable *drawable,
1337                           struct intel_renderbuffer *rb,
1338                           __DRIimage *buffer,
1339                           enum __DRIimageBufferMask buffer_type)
1340 {
1341    struct intel_region *region = buffer->region;
1342
1343    if (!rb || !region)
1344       return;
1345
1346    unsigned num_samples = rb->Base.Base.NumSamples;
1347
1348    if (rb->mt &&
1349        rb->mt->region &&
1350        rb->mt->region == region)
1351       return;
1352
1353    intel_miptree_release(&rb->mt);
1354    rb->mt = intel_miptree_create_for_image_buffer(intel,
1355                                                   buffer_type,
1356                                                   intel_rb_format(rb),
1357                                                   num_samples,
1358                                                   region);
1359 }
1360
1361 static void
1362 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1363 {
1364    struct gl_framebuffer *fb = drawable->driverPrivate;
1365    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1366    struct intel_renderbuffer *front_rb;
1367    struct intel_renderbuffer *back_rb;
1368    struct __DRIimageList images;
1369    unsigned int format;
1370    uint32_t buffer_mask = 0;
1371
1372    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1373    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1374
1375    if (back_rb)
1376       format = intel_rb_format(back_rb);
1377    else if (front_rb)
1378       format = intel_rb_format(front_rb);
1379    else
1380       return;
1381
1382    if ((brw->is_front_buffer_rendering || brw->is_front_buffer_reading || !back_rb) && front_rb)
1383       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1384
1385    if (back_rb)
1386       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1387
1388    (*screen->image.loader->getBuffers) (drawable,
1389                                         driGLFormatToImageFormat(format),
1390                                         &drawable->dri2.stamp,
1391                                         drawable->loaderPrivate,
1392                                         buffer_mask,
1393                                         &images);
1394
1395    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1396       drawable->w = images.front->width;
1397       drawable->h = images.front->height;
1398       intel_update_image_buffer(brw,
1399                                 drawable,
1400                                 front_rb,
1401                                 images.front,
1402                                 __DRI_IMAGE_BUFFER_FRONT);
1403    }
1404    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1405       drawable->w = images.back->width;
1406       drawable->h = images.back->height;
1407       intel_update_image_buffer(brw,
1408                                 drawable,
1409                                 back_rb,
1410                                 images.back,
1411                                 __DRI_IMAGE_BUFFER_BACK);
1412    }
1413 }