src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43
  44 #include "vbo/vbo_context.h"
  45
  46 #include "drivers/common/driverfuncs.h"
  47 #include "drivers/common/meta.h"
  48 #include "utils.h"
  49
  50 #include "brw_context.h"
  51 #include "brw_defines.h"
  52 #include "brw_draw.h"
  53 #include "brw_state.h"
  54
  55 #include "intel_batchbuffer.h"
  56 #include "intel_buffer_objects.h"
  57 #include "intel_buffers.h"
  58 #include "intel_fbo.h"
  59 #include "intel_mipmap_tree.h"
  60 #include "intel_pixel.h"
  61 #include "intel_image.h"
  62 #include "intel_tex.h"
  63 #include "intel_tex_obj.h"
  64
  65 #include "swrast_setup/swrast_setup.h"
  66 #include "tnl/tnl.h"
  67 #include "tnl/t_pipeline.h"
  68 #include "glsl/ralloc.h"
  69
  70 /***************************************
  71  * Mesa's Driver Functions
  72  ***************************************/
  73
  74 static size_t
  75 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  76                              GLenum internalFormat, int samples[16])
  77 {
  78    struct brw_context *brw = brw_context(ctx);
  79
  80    (void) target;
  81
  82    switch (brw->gen) {
  83    case 8:
  84       samples[0] = 8;
  85       samples[1] = 4;
  86       samples[2] = 2;
  87       return 3;
  88
  89    case 7:
  90       samples[0] = 8;
  91       samples[1] = 4;
  92       return 2;
  93
  94    case 6:
  95       samples[0] = 4;
  96       return 1;
  97
  98    default:
  99       samples[0] = 1;
 100       return 1;
 101    }
 102 }
 103
 104 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 105
 106 const char *
 107 brw_get_renderer_string(unsigned deviceID)
 108 {
 109    const char *chipset;
 110    static char buffer[128];
 111
 112    switch (deviceID) {
 113 #undef CHIPSET
 114 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 115 #include "pci_ids/i965_pci_ids.h"
 116    default:
 117       chipset = "Unknown Intel Chipset";
 118       break;
 119    }
 120
 121    (void) driGetRendererString(buffer, chipset, 0);
 122    return buffer;
 123 }
 124
 125 static const GLubyte *
 126 intelGetString(struct gl_context * ctx, GLenum name)
 127 {
 128    const struct brw_context *const brw = brw_context(ctx);
 129
 130    switch (name) {
 131    case GL_VENDOR:
 132       return (GLubyte *) brw_vendor_string;
 133
 134    case GL_RENDERER:
 135       return
 136          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 137
 138    default:
 139       return NULL;
 140    }
 141 }
 142
 143 static void
 144 intel_viewport(struct gl_context *ctx)
 145 {
 146    struct brw_context *brw = brw_context(ctx);
 147    __DRIcontext *driContext = brw->driContext;
 148
 149    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 150       dri2InvalidateDrawable(driContext->driDrawablePriv);
 151       dri2InvalidateDrawable(driContext->driReadablePriv);
 152    }
 153 }
 154
 155 static void
 156 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
 157 {
 158    struct brw_context *brw = brw_context(ctx);
 159
 160    if (ctx->swrast_context)
 161       _swrast_InvalidateState(ctx, new_state);
 162    _vbo_InvalidateState(ctx, new_state);
 163
 164    brw->NewGLState |= new_state;
 165 }
 166
 167 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 168
 169 static void
 170 intel_flush_front(struct gl_context *ctx)
 171 {
 172    struct brw_context *brw = brw_context(ctx);
 173    __DRIcontext *driContext = brw->driContext;
 174    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 175    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 176
 177    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 178       if (flushFront(screen) && driDrawable &&
 179           driDrawable->loaderPrivate) {
 180
 181          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 182           *
 183           * This potentially resolves both front and back buffer. It
 184           * is unnecessary to resolve the back, but harms nothing except
 185           * performance. And no one cares about front-buffer render
 186           * performance.
 187           */
 188          intel_resolve_for_dri2_flush(brw, driDrawable);
 189          intel_batchbuffer_flush(brw);
 190
 191          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 192
 193          /* We set the dirty bit in intel_prepare_render() if we're
 194           * front buffer rendering once we get there.
 195           */
 196          brw->front_buffer_dirty = false;
 197       }
 198    }
 199 }
 200
 201 static void
 202 intel_glFlush(struct gl_context *ctx)
 203 {
 204    struct brw_context *brw = brw_context(ctx);
 205
 206    intel_batchbuffer_flush(brw);
 207    intel_flush_front(ctx);
 208    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
 209       brw->need_throttle = true;
 210 }
 211
 212 void
 213 intelFinish(struct gl_context * ctx)
 214 {
 215    struct brw_context *brw = brw_context(ctx);
 216
 217    intel_glFlush(ctx);
 218
 219    if (brw->batch.last_bo)
 220       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 221 }
 222
 223 static void
 224 brw_init_driver_functions(struct brw_context *brw,
 225                           struct dd_function_table *functions)
 226 {
 227    _mesa_init_driver_functions(functions);
 228
 229    /* GLX uses DRI2 invalidate events to handle window resizing.
 230     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 231     * which doesn't provide a mechanism for snooping the event queues.
 232     *
 233     * So EGL still relies on viewport hacks to handle window resizing.
 234     * This should go away with DRI3000.
 235     */
 236    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 237       functions->Viewport = intel_viewport;
 238
 239    functions->Flush = intel_glFlush;
 240    functions->Finish = intelFinish;
 241    functions->GetString = intelGetString;
 242    functions->UpdateState = intelInvalidateState;
 243
 244    intelInitTextureFuncs(functions);
 245    intelInitTextureImageFuncs(functions);
 246    intelInitTextureSubImageFuncs(functions);
 247    intelInitTextureCopyImageFuncs(functions);
 248    intelInitClearFuncs(functions);
 249    intelInitBufferFuncs(functions);
 250    intelInitPixelFuncs(functions);
 251    intelInitBufferObjectFuncs(functions);
 252    intel_init_syncobj_functions(functions);
 253    brw_init_object_purgeable_functions(functions);
 254
 255    brwInitFragProgFuncs( functions );
 256    brw_init_common_queryobj_functions(functions);
 257    if (brw->gen >= 6)
 258       gen6_init_queryobj_functions(functions);
 259    else
 260       gen4_init_queryobj_functions(functions);
 261
 262    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 263
 264    functions->NewTransformFeedback = brw_new_transform_feedback;
 265    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 266    functions->GetTransformFeedbackVertexCount =
 267       brw_get_transform_feedback_vertex_count;
 268    if (brw->gen >= 7) {
 269       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 270       functions->EndTransformFeedback = gen7_end_transform_feedback;
 271       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 272       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 273    } else {
 274       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 275       functions->EndTransformFeedback = brw_end_transform_feedback;
 276    }
 277
 278    if (brw->gen >= 6)
 279       functions->GetSamplePosition = gen6_get_sample_position;
 280 }
 281
 282 static void
 283 brw_initialize_context_constants(struct brw_context *brw)
 284 {
 285    struct gl_context *ctx = &brw->ctx;
 286
 287    unsigned max_samplers =
 288       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 289
 290    ctx->Const.QueryCounterBits.Timestamp = 36;
 291
 292    ctx->Const.StripTextureBorder = true;
 293
 294    ctx->Const.MaxDualSourceDrawBuffers = 1;
 295    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 296    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 297    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 298    ctx->Const.MaxTextureUnits =
 299       MIN2(ctx->Const.MaxTextureCoordUnits,
 300            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 301    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 302    if (brw->gen >= 7)
 303       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 304    else
 305       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 306    if (_mesa_extension_override_enables.ARB_compute_shader) {
 307       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 308       ctx->Const.MaxUniformBufferBindings += 12;
 309    } else {
 310       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 311    }
 312    ctx->Const.MaxCombinedTextureImageUnits =
 313       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 314       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 315       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 316       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 317
 318    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 319    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 320       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 321    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 322    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 323    ctx->Const.MaxTextureMbytes = 1536;
 324
 325    if (brw->gen >= 7)
 326       ctx->Const.MaxArrayTextureLayers = 2048;
 327    else
 328       ctx->Const.MaxArrayTextureLayers = 512;
 329
 330    ctx->Const.MaxTextureRectSize = 1 << 12;
 331
 332    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 333
 334    ctx->Const.MaxRenderbufferSize = 8192;
 335
 336    /* Hardware only supports a limited number of transform feedback buffers.
 337     * So we need to override the Mesa default (which is based only on software
 338     * limits).
 339     */
 340    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 341
 342    /* On Gen6, in the worst case, we use up one binding table entry per
 343     * transform feedback component (see comments above the definition of
 344     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 345     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 346     * BRW_MAX_SOL_BINDINGS.
 347     *
 348     * In "separate components" mode, we need to divide this value by
 349     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 350     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 351     */
 352    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 353    ctx->Const.MaxTransformFeedbackSeparateComponents =
 354       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 355
 356    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 357
 358    int max_samples;
 359    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 360    const int clamp_max_samples =
 361       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 362
 363    if (clamp_max_samples < 0) {
 364       max_samples = msaa_modes[0];
 365    } else {
 366       /* Select the largest supported MSAA mode that does not exceed
 367        * clamp_max_samples.
 368        */
 369       max_samples = 0;
 370       for (int i = 0; msaa_modes[i] != 0; ++i) {
 371          if (msaa_modes[i] <= clamp_max_samples) {
 372             max_samples = msaa_modes[i];
 373             break;
 374          }
 375       }
 376    }
 377
 378    ctx->Const.MaxSamples = max_samples;
 379    ctx->Const.MaxColorTextureSamples = max_samples;
 380    ctx->Const.MaxDepthTextureSamples = max_samples;
 381    ctx->Const.MaxIntegerSamples = max_samples;
 382
 383    if (brw->gen >= 7)
 384       ctx->Const.MaxProgramTextureGatherComponents = 4;
 385    else if (brw->gen == 6)
 386       ctx->Const.MaxProgramTextureGatherComponents = 1;
 387
 388    ctx->Const.MinLineWidth = 1.0;
 389    ctx->Const.MinLineWidthAA = 1.0;
 390    ctx->Const.MaxLineWidth = 5.0;
 391    ctx->Const.MaxLineWidthAA = 5.0;
 392    ctx->Const.LineWidthGranularity = 0.5;
 393
 394    ctx->Const.MinPointSize = 1.0;
 395    ctx->Const.MinPointSizeAA = 1.0;
 396    ctx->Const.MaxPointSize = 255.0;
 397    ctx->Const.MaxPointSizeAA = 255.0;
 398    ctx->Const.PointSizeGranularity = 1.0;
 399
 400    if (brw->gen >= 5 || brw->is_g4x)
 401       ctx->Const.MaxClipPlanes = 8;
 402
 403    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 404    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 405    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 406    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 407    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 408    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 409    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 410    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 411    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 412    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 413    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 414    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 415       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 416            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 417
 418    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 419    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 420    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 421    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 422    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 423    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 424    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 425    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 426    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 427       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 428            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 429
 430    /* Fragment shaders use real, 32-bit twos-complement integers for all
 431     * integer types.
 432     */
 433    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 434    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 435    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 436    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 437    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 438
 439    if (brw->gen >= 7) {
 440       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 441       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 442       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 443       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 444       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 445       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 446       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 447       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 448       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 449    }
 450
 451    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 452     * but we're not sure how it's actually done for vertex order,
 453     * that affect provoking vertex decision. Always use last vertex
 454     * convention for quad primitive which works as expected for now.
 455     */
 456    if (brw->gen >= 6)
 457       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 458
 459    ctx->Const.NativeIntegers = true;
 460    ctx->Const.UniformBooleanTrue = 1;
 461
 462    /* From the gen4 PRM, volume 4 page 127:
 463     *
 464     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 465     *      the base address of the first element of the surface, computed in
 466     *      software by adding the surface base address to the byte offset of
 467     *      the element in the buffer."
 468     *
 469     * However, unaligned accesses are slower, so enforce buffer alignment.
 470     */
 471    ctx->Const.UniformBufferOffsetAlignment = 16;
 472    ctx->Const.TextureBufferOffsetAlignment = 16;
 473
 474    if (brw->gen >= 6) {
 475       ctx->Const.MaxVarying = 32;
 476       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 477       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 478       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 479       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 480    }
 481
 482    /* We want the GLSL compiler to emit code that uses condition codes */
 483    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 484       ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 485       ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
 486       ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
 487       ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
 488       ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 489       ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput =
 490          (i == MESA_SHADER_FRAGMENT);
 491       ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
 492          (i == MESA_SHADER_FRAGMENT);
 493       ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 494       ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
 495    }
 496
 497    ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 498    ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 499
 500    /* ARB_viewport_array */
 501    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 502       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 503       ctx->Const.ViewportSubpixelBits = 0;
 504
 505       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 506        */
 507       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 508       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 509    }
 510
 511    /* ARB_gpu_shader5 */
 512    if (brw->gen >= 7)
 513       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 514 }
 515
 516 /**
 517  * Process driconf (drirc) options, setting appropriate context flags.
 518  *
 519  * intelInitExtensions still pokes at optionCache directly, in order to
 520  * avoid advertising various extensions.  No flags are set, so it makes
 521  * sense to continue doing that there.
 522  */
 523 static void
 524 brw_process_driconf_options(struct brw_context *brw)
 525 {
 526    struct gl_context *ctx = &brw->ctx;
 527
 528    driOptionCache *options = &brw->optionCache;
 529    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 530                        brw->driContext->driScreenPriv->myNum, "i965");
 531
 532    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 533    switch (bo_reuse_mode) {
 534    case DRI_CONF_BO_REUSE_DISABLED:
 535       break;
 536    case DRI_CONF_BO_REUSE_ALL:
 537       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 538       break;
 539    }
 540
 541    if (!driQueryOptionb(options, "hiz")) {
 542        brw->has_hiz = false;
 543        /* On gen6, you can only do separate stencil with HIZ. */
 544        if (brw->gen == 6)
 545           brw->has_separate_stencil = false;
 546    }
 547
 548    if (driQueryOptionb(options, "always_flush_batch")) {
 549       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 550       brw->always_flush_batch = true;
 551    }
 552
 553    if (driQueryOptionb(options, "always_flush_cache")) {
 554       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 555       brw->always_flush_cache = true;
 556    }
 557
 558    if (driQueryOptionb(options, "disable_throttling")) {
 559       fprintf(stderr, "disabling flush throttling\n");
 560       brw->disable_throttling = true;
 561    }
 562
 563    brw->disable_derivative_optimization =
 564       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 565
 566    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 567
 568    ctx->Const.ForceGLSLExtensionsWarn =
 569       driQueryOptionb(options, "force_glsl_extensions_warn");
 570
 571    ctx->Const.DisableGLSLLineContinuations =
 572       driQueryOptionb(options, "disable_glsl_line_continuations");
 573 }
 574
 575 GLboolean
 576 brwCreateContext(gl_api api,
 577                  const struct gl_config *mesaVis,
 578                  __DRIcontext *driContextPriv,
 579                  unsigned major_version,
 580                  unsigned minor_version,
 581                  uint32_t flags,
 582                  bool notify_reset,
 583                  unsigned *dri_ctx_error,
 584                  void *sharedContextPrivate)
 585 {
 586    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 587    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 588    struct intel_screen *screen = sPriv->driverPrivate;
 589    const struct brw_device_info *devinfo = screen->devinfo;
 590    struct dd_function_table functions;
 591
 592    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 593     * provides us with context reset notifications.
 594     */
 595    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 596       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 597
 598    if (screen->has_context_reset_notification)
 599       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 600
 601    if (flags & ~allowed_flags) {
 602       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 603       return false;
 604    }
 605
 606    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 607    if (!brw) {
 608       fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
 609       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 610       return false;
 611    }
 612
 613    driContextPriv->driverPrivate = brw;
 614    brw->driContext = driContextPriv;
 615    brw->intelScreen = screen;
 616    brw->bufmgr = screen->bufmgr;
 617
 618    brw->gen = devinfo->gen;
 619    brw->gt = devinfo->gt;
 620    brw->is_g4x = devinfo->is_g4x;
 621    brw->is_baytrail = devinfo->is_baytrail;
 622    brw->is_haswell = devinfo->is_haswell;
 623    brw->is_cherryview = devinfo->is_cherryview;
 624    brw->has_llc = devinfo->has_llc;
 625    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 626    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 627    brw->has_pln = devinfo->has_pln;
 628    brw->has_compr4 = devinfo->has_compr4;
 629    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 630    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 631    brw->needs_unlit_centroid_workaround =
 632       devinfo->needs_unlit_centroid_workaround;
 633
 634    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 635    brw->has_swizzling = screen->hw_has_swizzling;
 636
 637    brw->vs.base.stage = MESA_SHADER_VERTEX;
 638    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 639    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 640    if (brw->gen >= 8) {
 641       gen8_init_vtable_surface_functions(brw);
 642       gen7_init_vtable_sampler_functions(brw);
 643       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 644    } else if (brw->gen >= 7) {
 645       gen7_init_vtable_surface_functions(brw);
 646       gen7_init_vtable_sampler_functions(brw);
 647       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 648    } else {
 649       gen4_init_vtable_surface_functions(brw);
 650       gen4_init_vtable_sampler_functions(brw);
 651       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 652    }
 653
 654    brw_init_driver_functions(brw, &functions);
 655
 656    if (notify_reset)
 657       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 658
 659    struct gl_context *ctx = &brw->ctx;
 660
 661    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 662       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 663       fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
 664       intelDestroyContext(driContextPriv);
 665       return false;
 666    }
 667
 668    driContextSetFlags(ctx, flags);
 669
 670    /* Initialize the software rasterizer and helper modules.
 671     *
 672     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 673     * software fallbacks (which we have to support on legacy GL to do weird
 674     * glDrawPixels(), glBitmap(), and other functions).
 675     */
 676    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 677       _swrast_CreateContext(ctx);
 678    }
 679
 680    _vbo_CreateContext(ctx);
 681    if (ctx->swrast_context) {
 682       _tnl_CreateContext(ctx);
 683       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 684       _swsetup_CreateContext(ctx);
 685
 686       /* Configure swrast to match hardware characteristics: */
 687       _swrast_allow_pixel_fog(ctx, false);
 688       _swrast_allow_vertex_fog(ctx, true);
 689    }
 690
 691    _mesa_meta_init(ctx);
 692
 693    brw_process_driconf_options(brw);
 694    brw_process_intel_debug_variable(brw);
 695    brw_initialize_context_constants(brw);
 696
 697    ctx->Const.ResetStrategy = notify_reset
 698       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 699
 700    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 701    _mesa_init_point(ctx);
 702
 703    intel_fbo_init(brw);
 704
 705    intel_batchbuffer_init(brw);
 706
 707    if (brw->gen >= 6) {
 708       /* Create a new hardware context.  Using a hardware context means that
 709        * our GPU state will be saved/restored on context switch, allowing us
 710        * to assume that the GPU is in the same state we left it in.
 711        *
 712        * This is required for transform feedback buffer offsets, query objects,
 713        * and also allows us to reduce how much state we have to emit.
 714        */
 715       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 716
 717       if (!brw->hw_ctx) {
 718          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 719          intelDestroyContext(driContextPriv);
 720          return false;
 721       }
 722    }
 723
 724    brw_init_state(brw);
 725
 726    intelInitExtensions(ctx);
 727
 728    brw_init_surface_formats(brw);
 729
 730    brw->max_vs_threads = devinfo->max_vs_threads;
 731    brw->max_gs_threads = devinfo->max_gs_threads;
 732    brw->max_wm_threads = devinfo->max_wm_threads;
 733    brw->urb.size = devinfo->urb.size;
 734    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 735    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 736    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 737
 738    /* Estimate the size of the mappable aperture into the GTT.  There's an
 739     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 740     * It turns out it's basically always 256MB, though some ancient hardware
 741     * was smaller.
 742     */
 743    uint32_t gtt_size = 256 * 1024 * 1024;
 744
 745    /* We don't want to map two objects such that a memcpy between them would
 746     * just fault one mapping in and then the other over and over forever.  So
 747     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 748     * taken up by things like the framebuffer and the ringbuffer and such, so
 749     * be more conservative.
 750     */
 751    brw->max_gtt_map_object_size = gtt_size / 4;
 752
 753    if (brw->gen == 6)
 754       brw->urb.gen6_gs_previously_active = false;
 755
 756    brw->prim_restart.in_progress = false;
 757    brw->prim_restart.enable_cut_index = false;
 758    brw->gs.enabled = false;
 759
 760    if (brw->gen < 6) {
 761       brw->curbe.last_buf = calloc(1, 4096);
 762       brw->curbe.next_buf = calloc(1, 4096);
 763    }
 764
 765    ctx->VertexProgram._MaintainTnlProgram = true;
 766    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 767
 768    brw_draw_init( brw );
 769
 770    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 771       /* Turn on some extra GL_ARB_debug_output generation. */
 772       brw->perf_debug = true;
 773    }
 774
 775    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 776       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 777
 778    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 779       brw_init_shader_time(brw);
 780
 781    _mesa_compute_version(ctx);
 782
 783    _mesa_initialize_dispatch_tables(ctx);
 784    _mesa_initialize_vbo_vtxfmt(ctx);
 785
 786    if (ctx->Extensions.AMD_performance_monitor) {
 787       brw_init_performance_monitors(brw);
 788    }
 789
 790    vbo_use_buffer_objects(ctx);
 791    vbo_always_unmap_buffers(ctx);
 792
 793    return true;
 794 }
 795
 796 void
 797 intelDestroyContext(__DRIcontext * driContextPriv)
 798 {
 799    struct brw_context *brw =
 800       (struct brw_context *) driContextPriv->driverPrivate;
 801    struct gl_context *ctx = &brw->ctx;
 802
 803    assert(brw); /* should never be null */
 804    if (!brw)
 805       return;
 806
 807    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 808    if (INTEL_DEBUG & DEBUG_AUB) {
 809       intel_batchbuffer_flush(brw);
 810       aub_dump_bmp(&brw->ctx);
 811    }
 812
 813    _mesa_meta_free(&brw->ctx);
 814
 815    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 816       /* Force a report. */
 817       brw->shader_time.report_time = 0;
 818
 819       brw_collect_and_report_shader_time(brw);
 820       brw_destroy_shader_time(brw);
 821    }
 822
 823    brw_destroy_state(brw);
 824    brw_draw_destroy(brw);
 825
 826    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 827
 828    free(brw->curbe.last_buf);
 829    free(brw->curbe.next_buf);
 830
 831    drm_intel_gem_context_destroy(brw->hw_ctx);
 832
 833    if (ctx->swrast_context) {
 834       _swsetup_DestroyContext(&brw->ctx);
 835       _tnl_DestroyContext(&brw->ctx);
 836    }
 837    _vbo_DestroyContext(&brw->ctx);
 838
 839    if (ctx->swrast_context)
 840       _swrast_DestroyContext(&brw->ctx);
 841
 842    intel_batchbuffer_free(brw);
 843
 844    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 845    brw->first_post_swapbuffers_batch = NULL;
 846
 847    driDestroyOptionCache(&brw->optionCache);
 848
 849    /* free the Mesa context */
 850    _mesa_free_context_data(&brw->ctx);
 851
 852    ralloc_free(brw);
 853    driContextPriv->driverPrivate = NULL;
 854 }
 855
 856 GLboolean
 857 intelUnbindContext(__DRIcontext * driContextPriv)
 858 {
 859    /* Unset current context and dispath table */
 860    _mesa_make_current(NULL, NULL, NULL);
 861
 862    return true;
 863 }
 864
 865 /**
 866  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 867  * on window system framebuffers.
 868  *
 869  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 870  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 871  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 872  * for a visual where you're guaranteed to be capable, but it turns out that
 873  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 874  * incapable ones, becuase there's no difference between the two in resources
 875  * used.  Applications thus get built that accidentally rely on the default
 876  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 877  * great...
 878  *
 879  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 880  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 881  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 882  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 883  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 884  * and get no sRGB encode (assuming that both kinds of visual are available).
 885  * Thus our choice to support sRGB by default on our visuals for desktop would
 886  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 887  *
 888  * Unfortunately, renderbuffer setup happens before a context is created.  So
 889  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 890  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 891  * yet), we go turn that back off before anyone finds out.
 892  */
 893 static void
 894 intel_gles3_srgb_workaround(struct brw_context *brw,
 895                             struct gl_framebuffer *fb)
 896 {
 897    struct gl_context *ctx = &brw->ctx;
 898
 899    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 900       return;
 901
 902    /* Some day when we support the sRGB capable bit on visuals available for
 903     * GLES, we'll need to respect that and not disable things here.
 904     */
 905    fb->Visual.sRGBCapable = false;
 906    for (int i = 0; i < BUFFER_COUNT; i++) {
 907       if (fb->Attachment[i].Renderbuffer &&
 908           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 909          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 910       }
 911    }
 912 }
 913
 914 GLboolean
 915 intelMakeCurrent(__DRIcontext * driContextPriv,
 916                  __DRIdrawable * driDrawPriv,
 917                  __DRIdrawable * driReadPriv)
 918 {
 919    struct brw_context *brw;
 920    GET_CURRENT_CONTEXT(curCtx);
 921
 922    if (driContextPriv)
 923       brw = (struct brw_context *) driContextPriv->driverPrivate;
 924    else
 925       brw = NULL;
 926
 927    /* According to the glXMakeCurrent() man page: "Pending commands to
 928     * the previous context, if any, are flushed before it is released."
 929     * But only flush if we're actually changing contexts.
 930     */
 931    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 932       _mesa_flush(curCtx);
 933    }
 934
 935    if (driContextPriv) {
 936       struct gl_context *ctx = &brw->ctx;
 937       struct gl_framebuffer *fb, *readFb;
 938
 939       if (driDrawPriv == NULL && driReadPriv == NULL) {
 940          fb = _mesa_get_incomplete_framebuffer();
 941          readFb = _mesa_get_incomplete_framebuffer();
 942       } else {
 943          fb = driDrawPriv->driverPrivate;
 944          readFb = driReadPriv->driverPrivate;
 945          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 946          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 947       }
 948
 949       /* The sRGB workaround changes the renderbuffer's format. We must change
 950        * the format before the renderbuffer's miptree get's allocated, otherwise
 951        * the formats of the renderbuffer and its miptree will differ.
 952        */
 953       intel_gles3_srgb_workaround(brw, fb);
 954       intel_gles3_srgb_workaround(brw, readFb);
 955
 956       /* If the context viewport hasn't been initialized, force a call out to
 957        * the loader to get buffers so we have a drawable size for the initial
 958        * viewport. */
 959       if (!brw->ctx.ViewportInitialized)
 960          intel_prepare_render(brw);
 961
 962       _mesa_make_current(ctx, fb, readFb);
 963    } else {
 964       _mesa_make_current(NULL, NULL, NULL);
 965    }
 966
 967    return true;
 968 }
 969
 970 void
 971 intel_resolve_for_dri2_flush(struct brw_context *brw,
 972                              __DRIdrawable *drawable)
 973 {
 974    if (brw->gen < 6) {
 975       /* MSAA and fast color clear are not supported, so don't waste time
 976        * checking whether a resolve is needed.
 977        */
 978       return;
 979    }
 980
 981    struct gl_framebuffer *fb = drawable->driverPrivate;
 982    struct intel_renderbuffer *rb;
 983
 984    /* Usually, only the back buffer will need to be downsampled. However,
 985     * the front buffer will also need it if the user has rendered into it.
 986     */
 987    static const gl_buffer_index buffers[2] = {
 988          BUFFER_BACK_LEFT,
 989          BUFFER_FRONT_LEFT,
 990    };
 991
 992    for (int i = 0; i < 2; ++i) {
 993       rb = intel_get_renderbuffer(fb, buffers[i]);
 994       if (rb == NULL || rb->mt == NULL)
 995          continue;
 996       if (rb->mt->num_samples <= 1)
 997          intel_miptree_resolve_color(brw, rb->mt);
 998       else
 999          intel_renderbuffer_downsample(brw, rb);
1000    }
1001 }
1002
1003 static unsigned
1004 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1005 {
1006    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1007 }
1008
1009 static void
1010 intel_query_dri2_buffers(struct brw_context *brw,
1011                          __DRIdrawable *drawable,
1012                          __DRIbuffer **buffers,
1013                          int *count);
1014
1015 static void
1016 intel_process_dri2_buffer(struct brw_context *brw,
1017                           __DRIdrawable *drawable,
1018                           __DRIbuffer *buffer,
1019                           struct intel_renderbuffer *rb,
1020                           const char *buffer_name);
1021
1022 static void
1023 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1024
1025 static void
1026 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1027 {
1028    struct gl_framebuffer *fb = drawable->driverPrivate;
1029    struct intel_renderbuffer *rb;
1030    __DRIbuffer *buffers = NULL;
1031    int i, count;
1032    const char *region_name;
1033
1034    /* Set this up front, so that in case our buffers get invalidated
1035     * while we're getting new buffers, we don't clobber the stamp and
1036     * thus ignore the invalidate. */
1037    drawable->lastStamp = drawable->dri2.stamp;
1038
1039    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1040       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1041
1042    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1043
1044    if (buffers == NULL)
1045       return;
1046
1047    for (i = 0; i < count; i++) {
1048        switch (buffers[i].attachment) {
1049        case __DRI_BUFFER_FRONT_LEFT:
1050            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1051            region_name = "dri2 front buffer";
1052            break;
1053
1054        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1055            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1056            region_name = "dri2 fake front buffer";
1057            break;
1058
1059        case __DRI_BUFFER_BACK_LEFT:
1060            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1061            region_name = "dri2 back buffer";
1062            break;
1063
1064        case __DRI_BUFFER_DEPTH:
1065        case __DRI_BUFFER_HIZ:
1066        case __DRI_BUFFER_DEPTH_STENCIL:
1067        case __DRI_BUFFER_STENCIL:
1068        case __DRI_BUFFER_ACCUM:
1069        default:
1070            fprintf(stderr,
1071                    "unhandled buffer attach event, attachment type %d\n",
1072                    buffers[i].attachment);
1073            return;
1074        }
1075
1076        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1077    }
1078
1079 }
1080
1081 void
1082 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1083 {
1084    struct brw_context *brw = context->driverPrivate;
1085    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1086
1087    /* Set this up front, so that in case our buffers get invalidated
1088     * while we're getting new buffers, we don't clobber the stamp and
1089     * thus ignore the invalidate. */
1090    drawable->lastStamp = drawable->dri2.stamp;
1091
1092    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1093       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1094
1095    if (screen->image.loader)
1096       intel_update_image_buffers(brw, drawable);
1097    else
1098       intel_update_dri2_buffers(brw, drawable);
1099
1100    driUpdateFramebufferSize(&brw->ctx, drawable);
1101 }
1102
1103 /**
1104  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1105  * state is required.
1106  */
1107 void
1108 intel_prepare_render(struct brw_context *brw)
1109 {
1110    struct gl_context *ctx = &brw->ctx;
1111    __DRIcontext *driContext = brw->driContext;
1112    __DRIdrawable *drawable;
1113
1114    drawable = driContext->driDrawablePriv;
1115    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1116       if (drawable->lastStamp != drawable->dri2.stamp)
1117          intel_update_renderbuffers(driContext, drawable);
1118       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1119    }
1120
1121    drawable = driContext->driReadablePriv;
1122    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1123       if (drawable->lastStamp != drawable->dri2.stamp)
1124          intel_update_renderbuffers(driContext, drawable);
1125       driContext->dri2.read_stamp = drawable->dri2.stamp;
1126    }
1127
1128    /* If we're currently rendering to the front buffer, the rendering
1129     * that will happen next will probably dirty the front buffer.  So
1130     * mark it as dirty here.
1131     */
1132    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1133       brw->front_buffer_dirty = true;
1134
1135    /* Wait for the swapbuffers before the one we just emitted, so we
1136     * don't get too many swaps outstanding for apps that are GPU-heavy
1137     * but not CPU-heavy.
1138     *
1139     * We're using intelDRI2Flush (called from the loader before
1140     * swapbuffer) and glFlush (for front buffer rendering) as the
1141     * indicator that a frame is done and then throttle when we get
1142     * here as we prepare to render the next frame.  At this point for
1143     * round trips for swap/copy and getting new buffers are done and
1144     * we'll spend less time waiting on the GPU.
1145     *
1146     * Unfortunately, we don't have a handle to the batch containing
1147     * the swap, and getting our hands on that doesn't seem worth it,
1148     * so we just us the first batch we emitted after the last swap.
1149     */
1150    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1151       if (!brw->disable_throttling)
1152          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1153       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1154       brw->first_post_swapbuffers_batch = NULL;
1155       brw->need_throttle = false;
1156    }
1157 }
1158
1159 /**
1160  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1161  *
1162  * To determine which DRI buffers to request, examine the renderbuffers
1163  * attached to the drawable's framebuffer. Then request the buffers with
1164  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1165  *
1166  * This is called from intel_update_renderbuffers().
1167  *
1168  * \param drawable      Drawable whose buffers are queried.
1169  * \param buffers       [out] List of buffers returned by DRI2 query.
1170  * \param buffer_count  [out] Number of buffers returned.
1171  *
1172  * \see intel_update_renderbuffers()
1173  * \see DRI2GetBuffers()
1174  * \see DRI2GetBuffersWithFormat()
1175  */
1176 static void
1177 intel_query_dri2_buffers(struct brw_context *brw,
1178                          __DRIdrawable *drawable,
1179                          __DRIbuffer **buffers,
1180                          int *buffer_count)
1181 {
1182    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1183    struct gl_framebuffer *fb = drawable->driverPrivate;
1184    int i = 0;
1185    unsigned attachments[8];
1186
1187    struct intel_renderbuffer *front_rb;
1188    struct intel_renderbuffer *back_rb;
1189
1190    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1191    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1192
1193    memset(attachments, 0, sizeof(attachments));
1194    if ((brw_is_front_buffer_drawing(fb) ||
1195         brw_is_front_buffer_reading(fb) ||
1196         !back_rb) && front_rb) {
1197       /* If a fake front buffer is in use, then querying for
1198        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1199        * the real front buffer to the fake front buffer.  So before doing the
1200        * query, we need to make sure all the pending drawing has landed in the
1201        * real front buffer.
1202        */
1203       intel_batchbuffer_flush(brw);
1204       intel_flush_front(&brw->ctx);
1205
1206       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1207       attachments[i++] = intel_bits_per_pixel(front_rb);
1208    } else if (front_rb && brw->front_buffer_dirty) {
1209       /* We have pending front buffer rendering, but we aren't querying for a
1210        * front buffer.  If the front buffer we have is a fake front buffer,
1211        * the X server is going to throw it away when it processes the query.
1212        * So before doing the query, make sure all the pending drawing has
1213        * landed in the real front buffer.
1214        */
1215       intel_batchbuffer_flush(brw);
1216       intel_flush_front(&brw->ctx);
1217    }
1218
1219    if (back_rb) {
1220       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1221       attachments[i++] = intel_bits_per_pixel(back_rb);
1222    }
1223
1224    assert(i <= ARRAY_SIZE(attachments));
1225
1226    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1227                                                         &drawable->w,
1228                                                         &drawable->h,
1229                                                         attachments, i / 2,
1230                                                         buffer_count,
1231                                                         drawable->loaderPrivate);
1232 }
1233
1234 /**
1235  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1236  *
1237  * This is called from intel_update_renderbuffers().
1238  *
1239  * \par Note:
1240  *    DRI buffers whose attachment point is DRI2BufferStencil or
1241  *    DRI2BufferDepthStencil are handled as special cases.
1242  *
1243  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1244  *        that is passed to drm_intel_bo_gem_create_from_name().
1245  *
1246  * \see intel_update_renderbuffers()
1247  */
1248 static void
1249 intel_process_dri2_buffer(struct brw_context *brw,
1250                           __DRIdrawable *drawable,
1251                           __DRIbuffer *buffer,
1252                           struct intel_renderbuffer *rb,
1253                           const char *buffer_name)
1254 {
1255    struct gl_framebuffer *fb = drawable->driverPrivate;
1256    drm_intel_bo *bo;
1257
1258    if (!rb)
1259       return;
1260
1261    unsigned num_samples = rb->Base.Base.NumSamples;
1262
1263    /* We try to avoid closing and reopening the same BO name, because the first
1264     * use of a mapping of the buffer involves a bunch of page faulting which is
1265     * moderately expensive.
1266     */
1267    struct intel_mipmap_tree *last_mt;
1268    if (num_samples == 0)
1269       last_mt = rb->mt;
1270    else
1271       last_mt = rb->singlesample_mt;
1272
1273    uint32_t old_name = 0;
1274    if (last_mt) {
1275        /* The bo already has a name because the miptree was created by a
1276         * previous call to intel_process_dri2_buffer(). If a bo already has a
1277         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1278         * create a new name.
1279         */
1280       drm_intel_bo_flink(last_mt->bo, &old_name);
1281    }
1282
1283    if (old_name == buffer->name)
1284       return;
1285
1286    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1287       fprintf(stderr,
1288               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1289               buffer->name, buffer->attachment,
1290               buffer->cpp, buffer->pitch);
1291    }
1292
1293    intel_miptree_release(&rb->mt);
1294    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1295                                           buffer->name);
1296    if (!bo) {
1297       fprintf(stderr,
1298               "Failed to open BO for returned DRI2 buffer "
1299               "(%dx%d, %s, named %d).\n"
1300               "This is likely a bug in the X Server that will lead to a "
1301               "crash soon.\n",
1302               drawable->w, drawable->h, buffer_name, buffer->name);
1303       return;
1304    }
1305
1306    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1307                                             drawable->w, drawable->h,
1308                                             buffer->pitch);
1309
1310    if (brw_is_front_buffer_drawing(fb) &&
1311        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1312         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1313        rb->Base.Base.NumSamples > 1) {
1314       intel_renderbuffer_upsample(brw, rb);
1315    }
1316
1317    assert(rb->mt);
1318
1319    drm_intel_bo_unreference(bo);
1320 }
1321
1322 /**
1323  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1324  *
1325  * To determine which DRI buffers to request, examine the renderbuffers
1326  * attached to the drawable's framebuffer. Then request the buffers from
1327  * the image loader
1328  *
1329  * This is called from intel_update_renderbuffers().
1330  *
1331  * \param drawable      Drawable whose buffers are queried.
1332  * \param buffers       [out] List of buffers returned by DRI2 query.
1333  * \param buffer_count  [out] Number of buffers returned.
1334  *
1335  * \see intel_update_renderbuffers()
1336  */
1337
1338 static void
1339 intel_update_image_buffer(struct brw_context *intel,
1340                           __DRIdrawable *drawable,
1341                           struct intel_renderbuffer *rb,
1342                           __DRIimage *buffer,
1343                           enum __DRIimageBufferMask buffer_type)
1344 {
1345    struct gl_framebuffer *fb = drawable->driverPrivate;
1346
1347    if (!rb || !buffer->bo)
1348       return;
1349
1350    unsigned num_samples = rb->Base.Base.NumSamples;
1351
1352    /* Check and see if we're already bound to the right
1353     * buffer object
1354     */
1355    struct intel_mipmap_tree *last_mt;
1356    if (num_samples == 0)
1357       last_mt = rb->mt;
1358    else
1359       last_mt = rb->singlesample_mt;
1360
1361    if (last_mt && last_mt->bo == buffer->bo)
1362       return;
1363
1364    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1365                                             buffer->width, buffer->height,
1366                                             buffer->pitch);
1367
1368    if (brw_is_front_buffer_drawing(fb) &&
1369        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1370        rb->Base.Base.NumSamples > 1) {
1371       intel_renderbuffer_upsample(intel, rb);
1372    }
1373 }
1374
1375 static void
1376 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1377 {
1378    struct gl_framebuffer *fb = drawable->driverPrivate;
1379    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1380    struct intel_renderbuffer *front_rb;
1381    struct intel_renderbuffer *back_rb;
1382    struct __DRIimageList images;
1383    unsigned int format;
1384    uint32_t buffer_mask = 0;
1385
1386    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1387    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1388
1389    if (back_rb)
1390       format = intel_rb_format(back_rb);
1391    else if (front_rb)
1392       format = intel_rb_format(front_rb);
1393    else
1394       return;
1395
1396    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1397                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1398       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1399    }
1400
1401    if (back_rb)
1402       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1403
1404    (*screen->image.loader->getBuffers) (drawable,
1405                                         driGLFormatToImageFormat(format),
1406                                         &drawable->dri2.stamp,
1407                                         drawable->loaderPrivate,
1408                                         buffer_mask,
1409                                         &images);
1410
1411    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1412       drawable->w = images.front->width;
1413       drawable->h = images.front->height;
1414       intel_update_image_buffer(brw,
1415                                 drawable,
1416                                 front_rb,
1417                                 images.front,
1418                                 __DRI_IMAGE_BUFFER_FRONT);
1419    }
1420    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1421       drawable->w = images.back->width;
1422       drawable->h = images.back->height;
1423       intel_update_image_buffer(brw,
1424                                 drawable,
1425                                 back_rb,
1426                                 images.back,
1427                                 __DRI_IMAGE_BUFFER_BACK);
1428    }
1429 }