src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/imports.h"
  38 #include "main/macros.h"
  39 #include "main/points.h"
  40 #include "main/version.h"
  41 #include "main/vtxfmt.h"
  42
  43 #include "vbo/vbo_context.h"
  44
  45 #include "drivers/common/driverfuncs.h"
  46 #include "drivers/common/meta.h"
  47 #include "utils.h"
  48
  49 #include "brw_context.h"
  50 #include "brw_defines.h"
  51 #include "brw_draw.h"
  52 #include "brw_state.h"
  53
  54 #include "intel_batchbuffer.h"
  55 #include "intel_buffer_objects.h"
  56 #include "intel_buffers.h"
  57 #include "intel_fbo.h"
  58 #include "intel_mipmap_tree.h"
  59 #include "intel_pixel.h"
  60 #include "intel_regions.h"
  61 #include "intel_tex.h"
  62 #include "intel_tex_obj.h"
  63
  64 #include "swrast_setup/swrast_setup.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_pipeline.h"
  67 #include "glsl/ralloc.h"
  68
  69 /***************************************
  70  * Mesa's Driver Functions
  71  ***************************************/
  72
  73 static size_t
  74 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  75                              GLenum internalFormat, int samples[16])
  76 {
  77    struct brw_context *brw = brw_context(ctx);
  78
  79    (void) target;
  80
  81    switch (brw->gen) {
  82    case 8:
  83       samples[0] = 8;
  84       samples[1] = 4;
  85       samples[2] = 2;
  86       return 3;
  87
  88    case 7:
  89       samples[0] = 8;
  90       samples[1] = 4;
  91       return 2;
  92
  93    case 6:
  94       samples[0] = 4;
  95       return 1;
  96
  97    default:
  98       samples[0] = 1;
  99       return 1;
 100    }
 101 }
 102
 103 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 104
 105 const char *
 106 brw_get_renderer_string(unsigned deviceID)
 107 {
 108    const char *chipset;
 109    static char buffer[128];
 110
 111    switch (deviceID) {
 112 #undef CHIPSET
 113 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 114 #include "pci_ids/i965_pci_ids.h"
 115    default:
 116       chipset = "Unknown Intel Chipset";
 117       break;
 118    }
 119
 120    (void) driGetRendererString(buffer, chipset, 0);
 121    return buffer;
 122 }
 123
 124 static const GLubyte *
 125 intelGetString(struct gl_context * ctx, GLenum name)
 126 {
 127    const struct brw_context *const brw = brw_context(ctx);
 128
 129    switch (name) {
 130    case GL_VENDOR:
 131       return (GLubyte *) brw_vendor_string;
 132
 133    case GL_RENDERER:
 134       return
 135          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 136
 137    default:
 138       return NULL;
 139    }
 140 }
 141
 142 static void
 143 intel_viewport(struct gl_context *ctx)
 144 {
 145    struct brw_context *brw = brw_context(ctx);
 146    __DRIcontext *driContext = brw->driContext;
 147
 148    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 149       dri2InvalidateDrawable(driContext->driDrawablePriv);
 150       dri2InvalidateDrawable(driContext->driReadablePriv);
 151    }
 152 }
 153
 154 static void
 155 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
 156 {
 157    struct brw_context *brw = brw_context(ctx);
 158
 159    if (ctx->swrast_context)
 160       _swrast_InvalidateState(ctx, new_state);
 161    _vbo_InvalidateState(ctx, new_state);
 162
 163    brw->NewGLState |= new_state;
 164 }
 165
 166 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 167
 168 static void
 169 intel_flush_front(struct gl_context *ctx)
 170 {
 171    struct brw_context *brw = brw_context(ctx);
 172    __DRIcontext *driContext = brw->driContext;
 173    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 174    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 175
 176    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 177       if (flushFront(screen) && driDrawable &&
 178           driDrawable->loaderPrivate) {
 179
 180          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 181           *
 182           * This potentially resolves both front and back buffer. It
 183           * is unnecessary to resolve the back, but harms nothing except
 184           * performance. And no one cares about front-buffer render
 185           * performance.
 186           */
 187          intel_resolve_for_dri2_flush(brw, driDrawable);
 188          intel_batchbuffer_flush(brw);
 189
 190          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 191
 192          /* We set the dirty bit in intel_prepare_render() if we're
 193           * front buffer rendering once we get there.
 194           */
 195          brw->front_buffer_dirty = false;
 196       }
 197    }
 198 }
 199
 200 static void
 201 intel_glFlush(struct gl_context *ctx)
 202 {
 203    struct brw_context *brw = brw_context(ctx);
 204
 205    intel_batchbuffer_flush(brw);
 206    intel_flush_front(ctx);
 207    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
 208       brw->need_throttle = true;
 209 }
 210
 211 void
 212 intelFinish(struct gl_context * ctx)
 213 {
 214    struct brw_context *brw = brw_context(ctx);
 215
 216    intel_glFlush(ctx);
 217
 218    if (brw->batch.last_bo)
 219       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 220 }
 221
 222 static void
 223 brw_init_driver_functions(struct brw_context *brw,
 224                           struct dd_function_table *functions)
 225 {
 226    _mesa_init_driver_functions(functions);
 227
 228    /* GLX uses DRI2 invalidate events to handle window resizing.
 229     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 230     * which doesn't provide a mechanism for snooping the event queues.
 231     *
 232     * So EGL still relies on viewport hacks to handle window resizing.
 233     * This should go away with DRI3000.
 234     */
 235    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 236       functions->Viewport = intel_viewport;
 237
 238    functions->Flush = intel_glFlush;
 239    functions->Finish = intelFinish;
 240    functions->GetString = intelGetString;
 241    functions->UpdateState = intelInvalidateState;
 242
 243    intelInitTextureFuncs(functions);
 244    intelInitTextureImageFuncs(functions);
 245    intelInitTextureSubImageFuncs(functions);
 246    intelInitTextureCopyImageFuncs(functions);
 247    intelInitClearFuncs(functions);
 248    intelInitBufferFuncs(functions);
 249    intelInitPixelFuncs(functions);
 250    intelInitBufferObjectFuncs(functions);
 251    intel_init_syncobj_functions(functions);
 252    brw_init_object_purgeable_functions(functions);
 253
 254    brwInitFragProgFuncs( functions );
 255    brw_init_common_queryobj_functions(functions);
 256    if (brw->gen >= 6)
 257       gen6_init_queryobj_functions(functions);
 258    else
 259       gen4_init_queryobj_functions(functions);
 260
 261    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 262
 263    functions->NewTransformFeedback = brw_new_transform_feedback;
 264    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 265    functions->GetTransformFeedbackVertexCount =
 266       brw_get_transform_feedback_vertex_count;
 267    if (brw->gen >= 7) {
 268       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 269       functions->EndTransformFeedback = gen7_end_transform_feedback;
 270       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 271       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 272    } else {
 273       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 274       functions->EndTransformFeedback = brw_end_transform_feedback;
 275    }
 276
 277    if (brw->gen >= 6)
 278       functions->GetSamplePosition = gen6_get_sample_position;
 279 }
 280
 281 static void
 282 brw_initialize_context_constants(struct brw_context *brw)
 283 {
 284    struct gl_context *ctx = &brw->ctx;
 285
 286    unsigned max_samplers =
 287       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 288
 289    ctx->Const.QueryCounterBits.Timestamp = 36;
 290
 291    ctx->Const.StripTextureBorder = true;
 292
 293    ctx->Const.MaxDualSourceDrawBuffers = 1;
 294    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 295    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 296    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 297    ctx->Const.MaxTextureUnits =
 298       MIN2(ctx->Const.MaxTextureCoordUnits,
 299            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 300    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 301    if (brw->gen >= 7)
 302       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 303    else
 304       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 305    if (getenv("INTEL_COMPUTE_SHADER")) {
 306       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 307       ctx->Const.MaxUniformBufferBindings += 12;
 308    } else {
 309       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 310    }
 311    ctx->Const.MaxCombinedTextureImageUnits =
 312       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 313       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 314       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 315       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 316
 317    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 318    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 319       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 320    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 321    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 322    ctx->Const.MaxTextureMbytes = 1536;
 323
 324    if (brw->gen >= 7)
 325       ctx->Const.MaxArrayTextureLayers = 2048;
 326    else
 327       ctx->Const.MaxArrayTextureLayers = 512;
 328
 329    ctx->Const.MaxTextureRectSize = 1 << 12;
 330
 331    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 332
 333    ctx->Const.MaxRenderbufferSize = 8192;
 334
 335    /* Hardware only supports a limited number of transform feedback buffers.
 336     * So we need to override the Mesa default (which is based only on software
 337     * limits).
 338     */
 339    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 340
 341    /* On Gen6, in the worst case, we use up one binding table entry per
 342     * transform feedback component (see comments above the definition of
 343     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 344     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 345     * BRW_MAX_SOL_BINDINGS.
 346     *
 347     * In "separate components" mode, we need to divide this value by
 348     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 349     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 350     */
 351    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 352    ctx->Const.MaxTransformFeedbackSeparateComponents =
 353       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 354
 355    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 356
 357    int max_samples;
 358    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 359    const int clamp_max_samples =
 360       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 361
 362    if (clamp_max_samples < 0) {
 363       max_samples = msaa_modes[0];
 364    } else {
 365       /* Select the largest supported MSAA mode that does not exceed
 366        * clamp_max_samples.
 367        */
 368       max_samples = 0;
 369       for (int i = 0; msaa_modes[i] != 0; ++i) {
 370          if (msaa_modes[i] <= clamp_max_samples) {
 371             max_samples = msaa_modes[i];
 372             break;
 373          }
 374       }
 375    }
 376
 377    ctx->Const.MaxSamples = max_samples;
 378    ctx->Const.MaxColorTextureSamples = max_samples;
 379    ctx->Const.MaxDepthTextureSamples = max_samples;
 380    ctx->Const.MaxIntegerSamples = max_samples;
 381
 382    if (brw->gen >= 7)
 383       ctx->Const.MaxProgramTextureGatherComponents = 4;
 384    else if (brw->gen == 6)
 385       ctx->Const.MaxProgramTextureGatherComponents = 1;
 386
 387    ctx->Const.MinLineWidth = 1.0;
 388    ctx->Const.MinLineWidthAA = 1.0;
 389    ctx->Const.MaxLineWidth = 5.0;
 390    ctx->Const.MaxLineWidthAA = 5.0;
 391    ctx->Const.LineWidthGranularity = 0.5;
 392
 393    ctx->Const.MinPointSize = 1.0;
 394    ctx->Const.MinPointSizeAA = 1.0;
 395    ctx->Const.MaxPointSize = 255.0;
 396    ctx->Const.MaxPointSizeAA = 255.0;
 397    ctx->Const.PointSizeGranularity = 1.0;
 398
 399    if (brw->gen >= 5 || brw->is_g4x)
 400       ctx->Const.MaxClipPlanes = 8;
 401
 402    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 403    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 404    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 405    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 406    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 407    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 408    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 409    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 410    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 411    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 412    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 413    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 414       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 415            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 416
 417    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 418    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 419    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 420    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 421    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 422    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 423    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 424    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 425    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 426       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 427            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 428
 429    /* Fragment shaders use real, 32-bit twos-complement integers for all
 430     * integer types.
 431     */
 432    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 433    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 434    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 435    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 436    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 437
 438    if (brw->gen >= 7) {
 439       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 440       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 441       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 442       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 443       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 444       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 445       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 446       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 447       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 448    }
 449
 450    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 451     * but we're not sure how it's actually done for vertex order,
 452     * that affect provoking vertex decision. Always use last vertex
 453     * convention for quad primitive which works as expected for now.
 454     */
 455    if (brw->gen >= 6)
 456       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 457
 458    ctx->Const.NativeIntegers = true;
 459    ctx->Const.UniformBooleanTrue = 1;
 460
 461    /* From the gen4 PRM, volume 4 page 127:
 462     *
 463     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 464     *      the base address of the first element of the surface, computed in
 465     *      software by adding the surface base address to the byte offset of
 466     *      the element in the buffer."
 467     *
 468     * However, unaligned accesses are slower, so enforce buffer alignment.
 469     */
 470    ctx->Const.UniformBufferOffsetAlignment = 16;
 471    ctx->Const.TextureBufferOffsetAlignment = 16;
 472
 473    if (brw->gen >= 6) {
 474       ctx->Const.MaxVarying = 32;
 475       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 476       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 477       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 478       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 479    }
 480
 481    /* We want the GLSL compiler to emit code that uses condition codes */
 482    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 483       ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 484       ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
 485       ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
 486       ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
 487       ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 488       ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput =
 489          (i == MESA_SHADER_FRAGMENT);
 490       ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
 491          (i == MESA_SHADER_FRAGMENT);
 492       ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 493       ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
 494    }
 495
 496    ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 497    ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 498
 499    /* ARB_viewport_array */
 500    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 501       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 502       ctx->Const.ViewportSubpixelBits = 0;
 503
 504       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 505        */
 506       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 507       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 508    }
 509 }
 510
 511 /**
 512  * Process driconf (drirc) options, setting appropriate context flags.
 513  *
 514  * intelInitExtensions still pokes at optionCache directly, in order to
 515  * avoid advertising various extensions.  No flags are set, so it makes
 516  * sense to continue doing that there.
 517  */
 518 static void
 519 brw_process_driconf_options(struct brw_context *brw)
 520 {
 521    struct gl_context *ctx = &brw->ctx;
 522
 523    driOptionCache *options = &brw->optionCache;
 524    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 525                        brw->driContext->driScreenPriv->myNum, "i965");
 526
 527    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 528    switch (bo_reuse_mode) {
 529    case DRI_CONF_BO_REUSE_DISABLED:
 530       break;
 531    case DRI_CONF_BO_REUSE_ALL:
 532       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 533       break;
 534    }
 535
 536    if (!driQueryOptionb(options, "hiz")) {
 537        brw->has_hiz = false;
 538        /* On gen6, you can only do separate stencil with HIZ. */
 539        if (brw->gen == 6)
 540           brw->has_separate_stencil = false;
 541    }
 542
 543    if (driQueryOptionb(options, "always_flush_batch")) {
 544       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 545       brw->always_flush_batch = true;
 546    }
 547
 548    if (driQueryOptionb(options, "always_flush_cache")) {
 549       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 550       brw->always_flush_cache = true;
 551    }
 552
 553    if (driQueryOptionb(options, "disable_throttling")) {
 554       fprintf(stderr, "disabling flush throttling\n");
 555       brw->disable_throttling = true;
 556    }
 557
 558    brw->disable_derivative_optimization =
 559       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 560
 561    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 562
 563    ctx->Const.ForceGLSLExtensionsWarn =
 564       driQueryOptionb(options, "force_glsl_extensions_warn");
 565
 566    ctx->Const.DisableGLSLLineContinuations =
 567       driQueryOptionb(options, "disable_glsl_line_continuations");
 568 }
 569
 570 GLboolean
 571 brwCreateContext(gl_api api,
 572                  const struct gl_config *mesaVis,
 573                  __DRIcontext *driContextPriv,
 574                  unsigned major_version,
 575                  unsigned minor_version,
 576                  uint32_t flags,
 577                  bool notify_reset,
 578                  unsigned *dri_ctx_error,
 579                  void *sharedContextPrivate)
 580 {
 581    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 582    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 583    struct intel_screen *screen = sPriv->driverPrivate;
 584    const struct brw_device_info *devinfo = screen->devinfo;
 585    struct dd_function_table functions;
 586
 587    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 588     * provides us with context reset notifications.
 589     */
 590    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 591       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 592
 593    if (screen->has_context_reset_notification)
 594       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 595
 596    if (flags & ~allowed_flags) {
 597       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 598       return false;
 599    }
 600
 601    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 602    if (!brw) {
 603       fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
 604       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 605       return false;
 606    }
 607
 608    driContextPriv->driverPrivate = brw;
 609    brw->driContext = driContextPriv;
 610    brw->intelScreen = screen;
 611    brw->bufmgr = screen->bufmgr;
 612
 613    brw->gen = devinfo->gen;
 614    brw->gt = devinfo->gt;
 615    brw->is_g4x = devinfo->is_g4x;
 616    brw->is_baytrail = devinfo->is_baytrail;
 617    brw->is_haswell = devinfo->is_haswell;
 618    brw->has_llc = devinfo->has_llc;
 619    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 620    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 621    brw->has_pln = devinfo->has_pln;
 622    brw->has_compr4 = devinfo->has_compr4;
 623    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 624    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 625    brw->needs_unlit_centroid_workaround =
 626       devinfo->needs_unlit_centroid_workaround;
 627
 628    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 629    brw->has_swizzling = screen->hw_has_swizzling;
 630
 631    if (brw->gen >= 8) {
 632       gen8_init_vtable_surface_functions(brw);
 633       gen7_init_vtable_sampler_functions(brw);
 634       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 635    } else if (brw->gen >= 7) {
 636       gen7_init_vtable_surface_functions(brw);
 637       gen7_init_vtable_sampler_functions(brw);
 638       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 639    } else {
 640       gen4_init_vtable_surface_functions(brw);
 641       gen4_init_vtable_sampler_functions(brw);
 642       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 643    }
 644
 645    brw_init_driver_functions(brw, &functions);
 646
 647    if (notify_reset)
 648       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 649
 650    struct gl_context *ctx = &brw->ctx;
 651
 652    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 653       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 654       fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
 655       intelDestroyContext(driContextPriv);
 656       return false;
 657    }
 658
 659    driContextSetFlags(ctx, flags);
 660
 661    /* Initialize the software rasterizer and helper modules.
 662     *
 663     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 664     * software fallbacks (which we have to support on legacy GL to do weird
 665     * glDrawPixels(), glBitmap(), and other functions).
 666     */
 667    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 668       _swrast_CreateContext(ctx);
 669    }
 670
 671    _vbo_CreateContext(ctx);
 672    if (ctx->swrast_context) {
 673       _tnl_CreateContext(ctx);
 674       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 675       _swsetup_CreateContext(ctx);
 676
 677       /* Configure swrast to match hardware characteristics: */
 678       _swrast_allow_pixel_fog(ctx, false);
 679       _swrast_allow_vertex_fog(ctx, true);
 680    }
 681
 682    _mesa_meta_init(ctx);
 683
 684    brw_process_driconf_options(brw);
 685    brw_process_intel_debug_variable(brw);
 686    brw_initialize_context_constants(brw);
 687
 688    ctx->Const.ResetStrategy = notify_reset
 689       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 690
 691    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 692    _mesa_init_point(ctx);
 693
 694    intel_fbo_init(brw);
 695
 696    intel_batchbuffer_init(brw);
 697
 698    if (brw->gen >= 6) {
 699       /* Create a new hardware context.  Using a hardware context means that
 700        * our GPU state will be saved/restored on context switch, allowing us
 701        * to assume that the GPU is in the same state we left it in.
 702        *
 703        * This is required for transform feedback buffer offsets, query objects,
 704        * and also allows us to reduce how much state we have to emit.
 705        */
 706       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 707
 708       if (!brw->hw_ctx) {
 709          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 710          intelDestroyContext(driContextPriv);
 711          return false;
 712       }
 713    }
 714
 715    brw_init_state(brw);
 716
 717    intelInitExtensions(ctx);
 718
 719    brw_init_surface_formats(brw);
 720
 721    if (brw->is_g4x || brw->gen >= 5) {
 722       brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
 723       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
 724   } else {
 725       brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
 726       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
 727    }
 728
 729    brw->max_vs_threads = devinfo->max_vs_threads;
 730    brw->max_gs_threads = devinfo->max_gs_threads;
 731    brw->max_wm_threads = devinfo->max_wm_threads;
 732    brw->urb.size = devinfo->urb.size;
 733    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 734    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 735    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 736
 737    /* Estimate the size of the mappable aperture into the GTT.  There's an
 738     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 739     * It turns out it's basically always 256MB, though some ancient hardware
 740     * was smaller.
 741     */
 742    uint32_t gtt_size = 256 * 1024 * 1024;
 743
 744    /* We don't want to map two objects such that a memcpy between them would
 745     * just fault one mapping in and then the other over and over forever.  So
 746     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 747     * taken up by things like the framebuffer and the ringbuffer and such, so
 748     * be more conservative.
 749     */
 750    brw->max_gtt_map_object_size = gtt_size / 4;
 751
 752    if (brw->gen == 6)
 753       brw->urb.gen6_gs_previously_active = false;
 754
 755    brw->prim_restart.in_progress = false;
 756    brw->prim_restart.enable_cut_index = false;
 757    brw->gs.enabled = false;
 758
 759    if (brw->gen < 6) {
 760       brw->curbe.last_buf = calloc(1, 4096);
 761       brw->curbe.next_buf = calloc(1, 4096);
 762    }
 763
 764    ctx->VertexProgram._MaintainTnlProgram = true;
 765    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 766
 767    brw_draw_init( brw );
 768
 769    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 770       /* Turn on some extra GL_ARB_debug_output generation. */
 771       brw->perf_debug = true;
 772    }
 773
 774    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 775       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 776
 777    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 778       brw_init_shader_time(brw);
 779
 780    _mesa_compute_version(ctx);
 781
 782    _mesa_initialize_dispatch_tables(ctx);
 783    _mesa_initialize_vbo_vtxfmt(ctx);
 784
 785    if (ctx->Extensions.AMD_performance_monitor) {
 786       brw_init_performance_monitors(brw);
 787    }
 788
 789    return true;
 790 }
 791
 792 void
 793 intelDestroyContext(__DRIcontext * driContextPriv)
 794 {
 795    struct brw_context *brw =
 796       (struct brw_context *) driContextPriv->driverPrivate;
 797    struct gl_context *ctx = &brw->ctx;
 798
 799    assert(brw); /* should never be null */
 800    if (!brw)
 801       return;
 802
 803    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 804    if (INTEL_DEBUG & DEBUG_AUB) {
 805       intel_batchbuffer_flush(brw);
 806       aub_dump_bmp(&brw->ctx);
 807    }
 808
 809    _mesa_meta_free(&brw->ctx);
 810
 811    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 812       /* Force a report. */
 813       brw->shader_time.report_time = 0;
 814
 815       brw_collect_and_report_shader_time(brw);
 816       brw_destroy_shader_time(brw);
 817    }
 818
 819    brw_destroy_state(brw);
 820    brw_draw_destroy(brw);
 821
 822    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 823
 824    free(brw->curbe.last_buf);
 825    free(brw->curbe.next_buf);
 826
 827    drm_intel_gem_context_destroy(brw->hw_ctx);
 828
 829    if (ctx->swrast_context) {
 830       _swsetup_DestroyContext(&brw->ctx);
 831       _tnl_DestroyContext(&brw->ctx);
 832    }
 833    _vbo_DestroyContext(&brw->ctx);
 834
 835    if (ctx->swrast_context)
 836       _swrast_DestroyContext(&brw->ctx);
 837
 838    intel_batchbuffer_free(brw);
 839
 840    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 841    brw->first_post_swapbuffers_batch = NULL;
 842
 843    driDestroyOptionCache(&brw->optionCache);
 844
 845    /* free the Mesa context */
 846    _mesa_free_context_data(&brw->ctx);
 847
 848    ralloc_free(brw);
 849    driContextPriv->driverPrivate = NULL;
 850 }
 851
 852 GLboolean
 853 intelUnbindContext(__DRIcontext * driContextPriv)
 854 {
 855    /* Unset current context and dispath table */
 856    _mesa_make_current(NULL, NULL, NULL);
 857
 858    return true;
 859 }
 860
 861 /**
 862  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 863  * on window system framebuffers.
 864  *
 865  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 866  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 867  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 868  * for a visual where you're guaranteed to be capable, but it turns out that
 869  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 870  * incapable ones, becuase there's no difference between the two in resources
 871  * used.  Applications thus get built that accidentally rely on the default
 872  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 873  * great...
 874  *
 875  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 876  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 877  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 878  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 879  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 880  * and get no sRGB encode (assuming that both kinds of visual are available).
 881  * Thus our choice to support sRGB by default on our visuals for desktop would
 882  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 883  *
 884  * Unfortunately, renderbuffer setup happens before a context is created.  So
 885  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 886  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 887  * yet), we go turn that back off before anyone finds out.
 888  */
 889 static void
 890 intel_gles3_srgb_workaround(struct brw_context *brw,
 891                             struct gl_framebuffer *fb)
 892 {
 893    struct gl_context *ctx = &brw->ctx;
 894
 895    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 896       return;
 897
 898    /* Some day when we support the sRGB capable bit on visuals available for
 899     * GLES, we'll need to respect that and not disable things here.
 900     */
 901    fb->Visual.sRGBCapable = false;
 902    for (int i = 0; i < BUFFER_COUNT; i++) {
 903       if (fb->Attachment[i].Renderbuffer &&
 904           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 905          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 906       }
 907    }
 908 }
 909
 910 GLboolean
 911 intelMakeCurrent(__DRIcontext * driContextPriv,
 912                  __DRIdrawable * driDrawPriv,
 913                  __DRIdrawable * driReadPriv)
 914 {
 915    struct brw_context *brw;
 916    GET_CURRENT_CONTEXT(curCtx);
 917
 918    if (driContextPriv)
 919       brw = (struct brw_context *) driContextPriv->driverPrivate;
 920    else
 921       brw = NULL;
 922
 923    /* According to the glXMakeCurrent() man page: "Pending commands to
 924     * the previous context, if any, are flushed before it is released."
 925     * But only flush if we're actually changing contexts.
 926     */
 927    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 928       _mesa_flush(curCtx);
 929    }
 930
 931    if (driContextPriv) {
 932       struct gl_context *ctx = &brw->ctx;
 933       struct gl_framebuffer *fb, *readFb;
 934
 935       if (driDrawPriv == NULL && driReadPriv == NULL) {
 936          fb = _mesa_get_incomplete_framebuffer();
 937          readFb = _mesa_get_incomplete_framebuffer();
 938       } else {
 939          fb = driDrawPriv->driverPrivate;
 940          readFb = driReadPriv->driverPrivate;
 941          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 942          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 943       }
 944
 945       /* The sRGB workaround changes the renderbuffer's format. We must change
 946        * the format before the renderbuffer's miptree get's allocated, otherwise
 947        * the formats of the renderbuffer and its miptree will differ.
 948        */
 949       intel_gles3_srgb_workaround(brw, fb);
 950       intel_gles3_srgb_workaround(brw, readFb);
 951
 952       /* If the context viewport hasn't been initialized, force a call out to
 953        * the loader to get buffers so we have a drawable size for the initial
 954        * viewport. */
 955       if (!brw->ctx.ViewportInitialized)
 956          intel_prepare_render(brw);
 957
 958       _mesa_make_current(ctx, fb, readFb);
 959    } else {
 960       _mesa_make_current(NULL, NULL, NULL);
 961    }
 962
 963    return true;
 964 }
 965
 966 void
 967 intel_resolve_for_dri2_flush(struct brw_context *brw,
 968                              __DRIdrawable *drawable)
 969 {
 970    if (brw->gen < 6) {
 971       /* MSAA and fast color clear are not supported, so don't waste time
 972        * checking whether a resolve is needed.
 973        */
 974       return;
 975    }
 976
 977    struct gl_framebuffer *fb = drawable->driverPrivate;
 978    struct intel_renderbuffer *rb;
 979
 980    /* Usually, only the back buffer will need to be downsampled. However,
 981     * the front buffer will also need it if the user has rendered into it.
 982     */
 983    static const gl_buffer_index buffers[2] = {
 984          BUFFER_BACK_LEFT,
 985          BUFFER_FRONT_LEFT,
 986    };
 987
 988    for (int i = 0; i < 2; ++i) {
 989       rb = intel_get_renderbuffer(fb, buffers[i]);
 990       if (rb == NULL || rb->mt == NULL)
 991          continue;
 992       if (rb->mt->num_samples <= 1)
 993          intel_miptree_resolve_color(brw, rb->mt);
 994       else
 995          intel_renderbuffer_downsample(brw, rb);
 996    }
 997 }
 998
 999 static unsigned
1000 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1001 {
1002    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1003 }
1004
1005 static void
1006 intel_query_dri2_buffers(struct brw_context *brw,
1007                          __DRIdrawable *drawable,
1008                          __DRIbuffer **buffers,
1009                          int *count);
1010
1011 static void
1012 intel_process_dri2_buffer(struct brw_context *brw,
1013                           __DRIdrawable *drawable,
1014                           __DRIbuffer *buffer,
1015                           struct intel_renderbuffer *rb,
1016                           const char *buffer_name);
1017
1018 static void
1019 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1020
1021 static void
1022 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1023 {
1024    struct gl_framebuffer *fb = drawable->driverPrivate;
1025    struct intel_renderbuffer *rb;
1026    __DRIbuffer *buffers = NULL;
1027    int i, count;
1028    const char *region_name;
1029
1030    /* Set this up front, so that in case our buffers get invalidated
1031     * while we're getting new buffers, we don't clobber the stamp and
1032     * thus ignore the invalidate. */
1033    drawable->lastStamp = drawable->dri2.stamp;
1034
1035    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1036       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1037
1038    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1039
1040    if (buffers == NULL)
1041       return;
1042
1043    for (i = 0; i < count; i++) {
1044        switch (buffers[i].attachment) {
1045        case __DRI_BUFFER_FRONT_LEFT:
1046            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1047            region_name = "dri2 front buffer";
1048            break;
1049
1050        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1051            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1052            region_name = "dri2 fake front buffer";
1053            break;
1054
1055        case __DRI_BUFFER_BACK_LEFT:
1056            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1057            region_name = "dri2 back buffer";
1058            break;
1059
1060        case __DRI_BUFFER_DEPTH:
1061        case __DRI_BUFFER_HIZ:
1062        case __DRI_BUFFER_DEPTH_STENCIL:
1063        case __DRI_BUFFER_STENCIL:
1064        case __DRI_BUFFER_ACCUM:
1065        default:
1066            fprintf(stderr,
1067                    "unhandled buffer attach event, attachment type %d\n",
1068                    buffers[i].attachment);
1069            return;
1070        }
1071
1072        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1073    }
1074
1075 }
1076
1077 void
1078 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1079 {
1080    struct brw_context *brw = context->driverPrivate;
1081    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1082
1083    /* Set this up front, so that in case our buffers get invalidated
1084     * while we're getting new buffers, we don't clobber the stamp and
1085     * thus ignore the invalidate. */
1086    drawable->lastStamp = drawable->dri2.stamp;
1087
1088    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1089       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1090
1091    if (screen->image.loader)
1092       intel_update_image_buffers(brw, drawable);
1093    else
1094       intel_update_dri2_buffers(brw, drawable);
1095
1096    driUpdateFramebufferSize(&brw->ctx, drawable);
1097 }
1098
1099 /**
1100  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1101  * state is required.
1102  */
1103 void
1104 intel_prepare_render(struct brw_context *brw)
1105 {
1106    struct gl_context *ctx = &brw->ctx;
1107    __DRIcontext *driContext = brw->driContext;
1108    __DRIdrawable *drawable;
1109
1110    drawable = driContext->driDrawablePriv;
1111    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1112       if (drawable->lastStamp != drawable->dri2.stamp)
1113          intel_update_renderbuffers(driContext, drawable);
1114       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1115    }
1116
1117    drawable = driContext->driReadablePriv;
1118    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1119       if (drawable->lastStamp != drawable->dri2.stamp)
1120          intel_update_renderbuffers(driContext, drawable);
1121       driContext->dri2.read_stamp = drawable->dri2.stamp;
1122    }
1123
1124    /* If we're currently rendering to the front buffer, the rendering
1125     * that will happen next will probably dirty the front buffer.  So
1126     * mark it as dirty here.
1127     */
1128    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1129       brw->front_buffer_dirty = true;
1130
1131    /* Wait for the swapbuffers before the one we just emitted, so we
1132     * don't get too many swaps outstanding for apps that are GPU-heavy
1133     * but not CPU-heavy.
1134     *
1135     * We're using intelDRI2Flush (called from the loader before
1136     * swapbuffer) and glFlush (for front buffer rendering) as the
1137     * indicator that a frame is done and then throttle when we get
1138     * here as we prepare to render the next frame.  At this point for
1139     * round trips for swap/copy and getting new buffers are done and
1140     * we'll spend less time waiting on the GPU.
1141     *
1142     * Unfortunately, we don't have a handle to the batch containing
1143     * the swap, and getting our hands on that doesn't seem worth it,
1144     * so we just us the first batch we emitted after the last swap.
1145     */
1146    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1147       if (!brw->disable_throttling)
1148          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1149       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1150       brw->first_post_swapbuffers_batch = NULL;
1151       brw->need_throttle = false;
1152    }
1153 }
1154
1155 /**
1156  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1157  *
1158  * To determine which DRI buffers to request, examine the renderbuffers
1159  * attached to the drawable's framebuffer. Then request the buffers with
1160  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1161  *
1162  * This is called from intel_update_renderbuffers().
1163  *
1164  * \param drawable      Drawable whose buffers are queried.
1165  * \param buffers       [out] List of buffers returned by DRI2 query.
1166  * \param buffer_count  [out] Number of buffers returned.
1167  *
1168  * \see intel_update_renderbuffers()
1169  * \see DRI2GetBuffers()
1170  * \see DRI2GetBuffersWithFormat()
1171  */
1172 static void
1173 intel_query_dri2_buffers(struct brw_context *brw,
1174                          __DRIdrawable *drawable,
1175                          __DRIbuffer **buffers,
1176                          int *buffer_count)
1177 {
1178    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1179    struct gl_framebuffer *fb = drawable->driverPrivate;
1180    int i = 0;
1181    unsigned attachments[8];
1182
1183    struct intel_renderbuffer *front_rb;
1184    struct intel_renderbuffer *back_rb;
1185
1186    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1187    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1188
1189    memset(attachments, 0, sizeof(attachments));
1190    if ((brw_is_front_buffer_drawing(fb) ||
1191         brw_is_front_buffer_reading(fb) ||
1192         !back_rb) && front_rb) {
1193       /* If a fake front buffer is in use, then querying for
1194        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1195        * the real front buffer to the fake front buffer.  So before doing the
1196        * query, we need to make sure all the pending drawing has landed in the
1197        * real front buffer.
1198        */
1199       intel_batchbuffer_flush(brw);
1200       intel_flush_front(&brw->ctx);
1201
1202       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1203       attachments[i++] = intel_bits_per_pixel(front_rb);
1204    } else if (front_rb && brw->front_buffer_dirty) {
1205       /* We have pending front buffer rendering, but we aren't querying for a
1206        * front buffer.  If the front buffer we have is a fake front buffer,
1207        * the X server is going to throw it away when it processes the query.
1208        * So before doing the query, make sure all the pending drawing has
1209        * landed in the real front buffer.
1210        */
1211       intel_batchbuffer_flush(brw);
1212       intel_flush_front(&brw->ctx);
1213    }
1214
1215    if (back_rb) {
1216       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1217       attachments[i++] = intel_bits_per_pixel(back_rb);
1218    }
1219
1220    assert(i <= ARRAY_SIZE(attachments));
1221
1222    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1223                                                         &drawable->w,
1224                                                         &drawable->h,
1225                                                         attachments, i / 2,
1226                                                         buffer_count,
1227                                                         drawable->loaderPrivate);
1228 }
1229
1230 /**
1231  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1232  *
1233  * This is called from intel_update_renderbuffers().
1234  *
1235  * \par Note:
1236  *    DRI buffers whose attachment point is DRI2BufferStencil or
1237  *    DRI2BufferDepthStencil are handled as special cases.
1238  *
1239  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1240  *        that is passed to intel_region_alloc_for_handle().
1241  *
1242  * \see intel_update_renderbuffers()
1243  * \see intel_region_alloc_for_handle()
1244  */
1245 static void
1246 intel_process_dri2_buffer(struct brw_context *brw,
1247                           __DRIdrawable *drawable,
1248                           __DRIbuffer *buffer,
1249                           struct intel_renderbuffer *rb,
1250                           const char *buffer_name)
1251 {
1252    struct intel_region *region = NULL;
1253    struct gl_framebuffer *fb = drawable->driverPrivate;
1254
1255    if (!rb)
1256       return;
1257
1258    unsigned num_samples = rb->Base.Base.NumSamples;
1259
1260    /* We try to avoid closing and reopening the same BO name, because the first
1261     * use of a mapping of the buffer involves a bunch of page faulting which is
1262     * moderately expensive.
1263     */
1264    struct intel_mipmap_tree *last_mt;
1265    if (num_samples == 0)
1266       last_mt = rb->mt;
1267    else
1268       last_mt = rb->singlesample_mt;
1269
1270    if (last_mt && last_mt->region->name == buffer->name)
1271       return;
1272
1273    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1274       fprintf(stderr,
1275               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1276               buffer->name, buffer->attachment,
1277               buffer->cpp, buffer->pitch);
1278    }
1279
1280    intel_miptree_release(&rb->mt);
1281    region = intel_region_alloc_for_handle(brw->intelScreen,
1282                                           buffer->cpp,
1283                                           drawable->w,
1284                                           drawable->h,
1285                                           buffer->pitch,
1286                                           buffer->name,
1287                                           buffer_name);
1288    if (!region) {
1289       fprintf(stderr,
1290               "Failed to make region for returned DRI2 buffer "
1291               "(%dx%d, named %d).\n"
1292               "This is likely a bug in the X Server that will lead to a "
1293               "crash soon.\n",
1294               drawable->w, drawable->h, buffer->name);
1295       return;
1296    }
1297
1298    intel_update_winsys_renderbuffer_miptree(brw, rb, region);
1299
1300    if (brw_is_front_buffer_drawing(fb) &&
1301        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1302         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1303        rb->Base.Base.NumSamples > 1) {
1304       intel_renderbuffer_upsample(brw, rb);
1305    }
1306
1307    assert(rb->mt);
1308
1309    intel_region_release(&region);
1310 }
1311
1312 /**
1313  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1314  *
1315  * To determine which DRI buffers to request, examine the renderbuffers
1316  * attached to the drawable's framebuffer. Then request the buffers from
1317  * the image loader
1318  *
1319  * This is called from intel_update_renderbuffers().
1320  *
1321  * \param drawable      Drawable whose buffers are queried.
1322  * \param buffers       [out] List of buffers returned by DRI2 query.
1323  * \param buffer_count  [out] Number of buffers returned.
1324  *
1325  * \see intel_update_renderbuffers()
1326  */
1327
1328 static void
1329 intel_update_image_buffer(struct brw_context *intel,
1330                           __DRIdrawable *drawable,
1331                           struct intel_renderbuffer *rb,
1332                           __DRIimage *buffer,
1333                           enum __DRIimageBufferMask buffer_type)
1334 {
1335    struct intel_region *region = buffer->region;
1336    struct gl_framebuffer *fb = drawable->driverPrivate;
1337
1338    if (!rb || !region)
1339       return;
1340
1341    unsigned num_samples = rb->Base.Base.NumSamples;
1342
1343    /* Check and see if we're already bound to the right
1344     * buffer object
1345     */
1346    struct intel_mipmap_tree *last_mt;
1347    if (num_samples == 0)
1348       last_mt = rb->mt;
1349    else
1350       last_mt = rb->singlesample_mt;
1351
1352    if (last_mt && last_mt->region->bo == region->bo)
1353       return;
1354
1355    intel_update_winsys_renderbuffer_miptree(intel, rb, region);
1356
1357    if (brw_is_front_buffer_drawing(fb) &&
1358        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1359        rb->Base.Base.NumSamples > 1) {
1360       intel_renderbuffer_upsample(intel, rb);
1361    }
1362 }
1363
1364 static void
1365 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1366 {
1367    struct gl_framebuffer *fb = drawable->driverPrivate;
1368    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1369    struct intel_renderbuffer *front_rb;
1370    struct intel_renderbuffer *back_rb;
1371    struct __DRIimageList images;
1372    unsigned int format;
1373    uint32_t buffer_mask = 0;
1374
1375    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1376    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1377
1378    if (back_rb)
1379       format = intel_rb_format(back_rb);
1380    else if (front_rb)
1381       format = intel_rb_format(front_rb);
1382    else
1383       return;
1384
1385    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1386                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1387       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1388    }
1389
1390    if (back_rb)
1391       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1392
1393    (*screen->image.loader->getBuffers) (drawable,
1394                                         driGLFormatToImageFormat(format),
1395                                         &drawable->dri2.stamp,
1396                                         drawable->loaderPrivate,
1397                                         buffer_mask,
1398                                         &images);
1399
1400    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1401       drawable->w = images.front->width;
1402       drawable->h = images.front->height;
1403       intel_update_image_buffer(brw,
1404                                 drawable,
1405                                 front_rb,
1406                                 images.front,
1407                                 __DRI_IMAGE_BUFFER_FRONT);
1408    }
1409    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1410       drawable->w = images.back->width;
1411       drawable->h = images.back->height;
1412       intel_update_image_buffer(brw,
1413                                 drawable,
1414                                 back_rb,
1415                                 images.back,
1416                                 __DRI_IMAGE_BUFFER_BACK);
1417    }
1418 }