src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43
  44 #include "vbo/vbo_context.h"
  45
  46 #include "drivers/common/driverfuncs.h"
  47 #include "drivers/common/meta.h"
  48 #include "utils.h"
  49
  50 #include "brw_context.h"
  51 #include "brw_defines.h"
  52 #include "brw_draw.h"
  53 #include "brw_state.h"
  54
  55 #include "intel_batchbuffer.h"
  56 #include "intel_buffer_objects.h"
  57 #include "intel_buffers.h"
  58 #include "intel_fbo.h"
  59 #include "intel_mipmap_tree.h"
  60 #include "intel_pixel.h"
  61 #include "intel_image.h"
  62 #include "intel_tex.h"
  63 #include "intel_tex_obj.h"
  64
  65 #include "swrast_setup/swrast_setup.h"
  66 #include "tnl/tnl.h"
  67 #include "tnl/t_pipeline.h"
  68 #include "util/ralloc.h"
  69
  70 /***************************************
  71  * Mesa's Driver Functions
  72  ***************************************/
  73
  74 static size_t
  75 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  76                              GLenum internalFormat, int samples[16])
  77 {
  78    struct brw_context *brw = brw_context(ctx);
  79
  80    (void) target;
  81
  82    switch (brw->gen) {
  83    case 8:
  84       samples[0] = 8;
  85       samples[1] = 4;
  86       samples[2] = 2;
  87       return 3;
  88
  89    case 7:
  90       samples[0] = 8;
  91       samples[1] = 4;
  92       return 2;
  93
  94    case 6:
  95       samples[0] = 4;
  96       return 1;
  97
  98    default:
  99       samples[0] = 1;
 100       return 1;
 101    }
 102 }
 103
 104 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 105
 106 const char *
 107 brw_get_renderer_string(unsigned deviceID)
 108 {
 109    const char *chipset;
 110    static char buffer[128];
 111
 112    switch (deviceID) {
 113 #undef CHIPSET
 114 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 115 #include "pci_ids/i965_pci_ids.h"
 116    default:
 117       chipset = "Unknown Intel Chipset";
 118       break;
 119    }
 120
 121    (void) driGetRendererString(buffer, chipset, 0);
 122    return buffer;
 123 }
 124
 125 static const GLubyte *
 126 intelGetString(struct gl_context * ctx, GLenum name)
 127 {
 128    const struct brw_context *const brw = brw_context(ctx);
 129
 130    switch (name) {
 131    case GL_VENDOR:
 132       return (GLubyte *) brw_vendor_string;
 133
 134    case GL_RENDERER:
 135       return
 136          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 137
 138    default:
 139       return NULL;
 140    }
 141 }
 142
 143 static void
 144 intel_viewport(struct gl_context *ctx)
 145 {
 146    struct brw_context *brw = brw_context(ctx);
 147    __DRIcontext *driContext = brw->driContext;
 148
 149    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 150       dri2InvalidateDrawable(driContext->driDrawablePriv);
 151       dri2InvalidateDrawable(driContext->driReadablePriv);
 152    }
 153 }
 154
 155 static void
 156 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
 157 {
 158    struct brw_context *brw = brw_context(ctx);
 159
 160    if (ctx->swrast_context)
 161       _swrast_InvalidateState(ctx, new_state);
 162    _vbo_InvalidateState(ctx, new_state);
 163
 164    brw->NewGLState |= new_state;
 165 }
 166
 167 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 168
 169 static void
 170 intel_flush_front(struct gl_context *ctx)
 171 {
 172    struct brw_context *brw = brw_context(ctx);
 173    __DRIcontext *driContext = brw->driContext;
 174    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 175    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 176
 177    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 178       if (flushFront(screen) && driDrawable &&
 179           driDrawable->loaderPrivate) {
 180
 181          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 182           *
 183           * This potentially resolves both front and back buffer. It
 184           * is unnecessary to resolve the back, but harms nothing except
 185           * performance. And no one cares about front-buffer render
 186           * performance.
 187           */
 188          intel_resolve_for_dri2_flush(brw, driDrawable);
 189          intel_batchbuffer_flush(brw);
 190
 191          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 192
 193          /* We set the dirty bit in intel_prepare_render() if we're
 194           * front buffer rendering once we get there.
 195           */
 196          brw->front_buffer_dirty = false;
 197       }
 198    }
 199 }
 200
 201 static void
 202 intel_glFlush(struct gl_context *ctx)
 203 {
 204    struct brw_context *brw = brw_context(ctx);
 205
 206    intel_batchbuffer_flush(brw);
 207    intel_flush_front(ctx);
 208    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
 209       brw->need_throttle = true;
 210 }
 211
 212 void
 213 intelFinish(struct gl_context * ctx)
 214 {
 215    struct brw_context *brw = brw_context(ctx);
 216
 217    intel_glFlush(ctx);
 218
 219    if (brw->batch.last_bo)
 220       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 221 }
 222
 223 static void
 224 brw_init_driver_functions(struct brw_context *brw,
 225                           struct dd_function_table *functions)
 226 {
 227    _mesa_init_driver_functions(functions);
 228
 229    /* GLX uses DRI2 invalidate events to handle window resizing.
 230     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 231     * which doesn't provide a mechanism for snooping the event queues.
 232     *
 233     * So EGL still relies on viewport hacks to handle window resizing.
 234     * This should go away with DRI3000.
 235     */
 236    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 237       functions->Viewport = intel_viewport;
 238
 239    functions->Flush = intel_glFlush;
 240    functions->Finish = intelFinish;
 241    functions->GetString = intelGetString;
 242    functions->UpdateState = intelInvalidateState;
 243
 244    intelInitTextureFuncs(functions);
 245    intelInitTextureImageFuncs(functions);
 246    intelInitTextureSubImageFuncs(functions);
 247    intelInitTextureCopyImageFuncs(functions);
 248    intelInitClearFuncs(functions);
 249    intelInitBufferFuncs(functions);
 250    intelInitPixelFuncs(functions);
 251    intelInitBufferObjectFuncs(functions);
 252    intel_init_syncobj_functions(functions);
 253    brw_init_object_purgeable_functions(functions);
 254
 255    brwInitFragProgFuncs( functions );
 256    brw_init_common_queryobj_functions(functions);
 257    if (brw->gen >= 6)
 258       gen6_init_queryobj_functions(functions);
 259    else
 260       gen4_init_queryobj_functions(functions);
 261
 262    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 263
 264    functions->NewTransformFeedback = brw_new_transform_feedback;
 265    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 266    functions->GetTransformFeedbackVertexCount =
 267       brw_get_transform_feedback_vertex_count;
 268    if (brw->gen >= 7) {
 269       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 270       functions->EndTransformFeedback = gen7_end_transform_feedback;
 271       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 272       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 273    } else {
 274       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 275       functions->EndTransformFeedback = brw_end_transform_feedback;
 276    }
 277
 278    if (brw->gen >= 6)
 279       functions->GetSamplePosition = gen6_get_sample_position;
 280 }
 281
 282 static void
 283 brw_initialize_context_constants(struct brw_context *brw)
 284 {
 285    struct gl_context *ctx = &brw->ctx;
 286
 287    unsigned max_samplers =
 288       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 289
 290    ctx->Const.QueryCounterBits.Timestamp = 36;
 291
 292    ctx->Const.StripTextureBorder = true;
 293
 294    ctx->Const.MaxDualSourceDrawBuffers = 1;
 295    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 296    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 297    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 298    ctx->Const.MaxTextureUnits =
 299       MIN2(ctx->Const.MaxTextureCoordUnits,
 300            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 301    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 302    if (brw->gen >= 7)
 303       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 304    else
 305       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 306    if (_mesa_extension_override_enables.ARB_compute_shader) {
 307       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 308       ctx->Const.MaxUniformBufferBindings += 12;
 309    } else {
 310       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 311    }
 312    ctx->Const.MaxCombinedTextureImageUnits =
 313       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 314       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 315       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 316       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 317
 318    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 319    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 320       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 321    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 322    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 323    ctx->Const.MaxTextureMbytes = 1536;
 324
 325    if (brw->gen >= 7)
 326       ctx->Const.MaxArrayTextureLayers = 2048;
 327    else
 328       ctx->Const.MaxArrayTextureLayers = 512;
 329
 330    ctx->Const.MaxTextureRectSize = 1 << 12;
 331
 332    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 333
 334    ctx->Const.MaxRenderbufferSize = 8192;
 335
 336    /* Hardware only supports a limited number of transform feedback buffers.
 337     * So we need to override the Mesa default (which is based only on software
 338     * limits).
 339     */
 340    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 341
 342    /* On Gen6, in the worst case, we use up one binding table entry per
 343     * transform feedback component (see comments above the definition of
 344     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 345     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 346     * BRW_MAX_SOL_BINDINGS.
 347     *
 348     * In "separate components" mode, we need to divide this value by
 349     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 350     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 351     */
 352    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 353    ctx->Const.MaxTransformFeedbackSeparateComponents =
 354       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 355
 356    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 357
 358    int max_samples;
 359    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 360    const int clamp_max_samples =
 361       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 362
 363    if (clamp_max_samples < 0) {
 364       max_samples = msaa_modes[0];
 365    } else {
 366       /* Select the largest supported MSAA mode that does not exceed
 367        * clamp_max_samples.
 368        */
 369       max_samples = 0;
 370       for (int i = 0; msaa_modes[i] != 0; ++i) {
 371          if (msaa_modes[i] <= clamp_max_samples) {
 372             max_samples = msaa_modes[i];
 373             break;
 374          }
 375       }
 376    }
 377
 378    ctx->Const.MaxSamples = max_samples;
 379    ctx->Const.MaxColorTextureSamples = max_samples;
 380    ctx->Const.MaxDepthTextureSamples = max_samples;
 381    ctx->Const.MaxIntegerSamples = max_samples;
 382
 383    if (brw->gen >= 7)
 384       ctx->Const.MaxProgramTextureGatherComponents = 4;
 385    else if (brw->gen == 6)
 386       ctx->Const.MaxProgramTextureGatherComponents = 1;
 387
 388    ctx->Const.MinLineWidth = 1.0;
 389    ctx->Const.MinLineWidthAA = 1.0;
 390    ctx->Const.MaxLineWidth = 5.0;
 391    ctx->Const.MaxLineWidthAA = 5.0;
 392    ctx->Const.LineWidthGranularity = 0.5;
 393
 394    ctx->Const.MinPointSize = 1.0;
 395    ctx->Const.MinPointSizeAA = 1.0;
 396    ctx->Const.MaxPointSize = 255.0;
 397    ctx->Const.MaxPointSizeAA = 255.0;
 398    ctx->Const.PointSizeGranularity = 1.0;
 399
 400    if (brw->gen >= 5 || brw->is_g4x)
 401       ctx->Const.MaxClipPlanes = 8;
 402
 403    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 404    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 405    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 406    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 407    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 408    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 409    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 410    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 411    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 412    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 413    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 414    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 415       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 416            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 417
 418    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 419    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 420    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 421    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 422    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 423    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 424    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 425    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 426    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 427       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 428            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 429
 430    /* Fragment shaders use real, 32-bit twos-complement integers for all
 431     * integer types.
 432     */
 433    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 434    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 435    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 436    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 437    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 438
 439    if (brw->gen >= 7) {
 440       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 441       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 442       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 443       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 444       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 445       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 446       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 447       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 448       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 449    }
 450
 451    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 452     * but we're not sure how it's actually done for vertex order,
 453     * that affect provoking vertex decision. Always use last vertex
 454     * convention for quad primitive which works as expected for now.
 455     */
 456    if (brw->gen >= 6)
 457       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 458
 459    ctx->Const.NativeIntegers = true;
 460    ctx->Const.UniformBooleanTrue = 1;
 461
 462    /* From the gen4 PRM, volume 4 page 127:
 463     *
 464     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 465     *      the base address of the first element of the surface, computed in
 466     *      software by adding the surface base address to the byte offset of
 467     *      the element in the buffer."
 468     *
 469     * However, unaligned accesses are slower, so enforce buffer alignment.
 470     */
 471    ctx->Const.UniformBufferOffsetAlignment = 16;
 472    ctx->Const.TextureBufferOffsetAlignment = 16;
 473
 474    if (brw->gen >= 6) {
 475       ctx->Const.MaxVarying = 32;
 476       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 477       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 478       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 479       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 480    }
 481
 482    /* We want the GLSL compiler to emit code that uses condition codes */
 483    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 484       ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 485       ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
 486       ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
 487       ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
 488       ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 489       ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput =
 490          (i == MESA_SHADER_FRAGMENT);
 491       ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
 492          (i == MESA_SHADER_FRAGMENT);
 493       ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 494       ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
 495    }
 496
 497    ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 498    ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 499
 500    /* ARB_viewport_array */
 501    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 502       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 503       ctx->Const.ViewportSubpixelBits = 0;
 504
 505       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 506        */
 507       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 508       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 509    }
 510
 511    /* ARB_gpu_shader5 */
 512    if (brw->gen >= 7)
 513       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 514 }
 515
 516 /**
 517  * Process driconf (drirc) options, setting appropriate context flags.
 518  *
 519  * intelInitExtensions still pokes at optionCache directly, in order to
 520  * avoid advertising various extensions.  No flags are set, so it makes
 521  * sense to continue doing that there.
 522  */
 523 static void
 524 brw_process_driconf_options(struct brw_context *brw)
 525 {
 526    struct gl_context *ctx = &brw->ctx;
 527
 528    driOptionCache *options = &brw->optionCache;
 529    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 530                        brw->driContext->driScreenPriv->myNum, "i965");
 531
 532    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 533    switch (bo_reuse_mode) {
 534    case DRI_CONF_BO_REUSE_DISABLED:
 535       break;
 536    case DRI_CONF_BO_REUSE_ALL:
 537       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 538       break;
 539    }
 540
 541    if (!driQueryOptionb(options, "hiz")) {
 542        brw->has_hiz = false;
 543        /* On gen6, you can only do separate stencil with HIZ. */
 544        if (brw->gen == 6)
 545           brw->has_separate_stencil = false;
 546    }
 547
 548    if (driQueryOptionb(options, "always_flush_batch")) {
 549       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 550       brw->always_flush_batch = true;
 551    }
 552
 553    if (driQueryOptionb(options, "always_flush_cache")) {
 554       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 555       brw->always_flush_cache = true;
 556    }
 557
 558    if (driQueryOptionb(options, "disable_throttling")) {
 559       fprintf(stderr, "disabling flush throttling\n");
 560       brw->disable_throttling = true;
 561    }
 562
 563    brw->disable_derivative_optimization =
 564       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 565
 566    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 567
 568    ctx->Const.ForceGLSLExtensionsWarn =
 569       driQueryOptionb(options, "force_glsl_extensions_warn");
 570
 571    ctx->Const.DisableGLSLLineContinuations =
 572       driQueryOptionb(options, "disable_glsl_line_continuations");
 573
 574    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 575       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 576 }
 577
 578 GLboolean
 579 brwCreateContext(gl_api api,
 580                  const struct gl_config *mesaVis,
 581                  __DRIcontext *driContextPriv,
 582                  unsigned major_version,
 583                  unsigned minor_version,
 584                  uint32_t flags,
 585                  bool notify_reset,
 586                  unsigned *dri_ctx_error,
 587                  void *sharedContextPrivate)
 588 {
 589    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 590    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 591    struct intel_screen *screen = sPriv->driverPrivate;
 592    const struct brw_device_info *devinfo = screen->devinfo;
 593    struct dd_function_table functions;
 594
 595    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 596     * provides us with context reset notifications.
 597     */
 598    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 599       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 600
 601    if (screen->has_context_reset_notification)
 602       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 603
 604    if (flags & ~allowed_flags) {
 605       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 606       return false;
 607    }
 608
 609    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 610    if (!brw) {
 611       fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
 612       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 613       return false;
 614    }
 615
 616    driContextPriv->driverPrivate = brw;
 617    brw->driContext = driContextPriv;
 618    brw->intelScreen = screen;
 619    brw->bufmgr = screen->bufmgr;
 620
 621    brw->gen = devinfo->gen;
 622    brw->gt = devinfo->gt;
 623    brw->is_g4x = devinfo->is_g4x;
 624    brw->is_baytrail = devinfo->is_baytrail;
 625    brw->is_haswell = devinfo->is_haswell;
 626    brw->is_cherryview = devinfo->is_cherryview;
 627    brw->has_llc = devinfo->has_llc;
 628    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 629    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 630    brw->has_pln = devinfo->has_pln;
 631    brw->has_compr4 = devinfo->has_compr4;
 632    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 633    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 634    brw->needs_unlit_centroid_workaround =
 635       devinfo->needs_unlit_centroid_workaround;
 636
 637    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 638    brw->has_swizzling = screen->hw_has_swizzling;
 639
 640    brw->vs.base.stage = MESA_SHADER_VERTEX;
 641    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 642    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 643    if (brw->gen >= 8) {
 644       gen8_init_vtable_surface_functions(brw);
 645       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 646    } else if (brw->gen >= 7) {
 647       gen7_init_vtable_surface_functions(brw);
 648       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 649    } else {
 650       gen4_init_vtable_surface_functions(brw);
 651       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 652    }
 653
 654    brw_init_driver_functions(brw, &functions);
 655
 656    if (notify_reset)
 657       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 658
 659    struct gl_context *ctx = &brw->ctx;
 660
 661    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 662       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 663       fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
 664       intelDestroyContext(driContextPriv);
 665       return false;
 666    }
 667
 668    driContextSetFlags(ctx, flags);
 669
 670    /* Initialize the software rasterizer and helper modules.
 671     *
 672     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 673     * software fallbacks (which we have to support on legacy GL to do weird
 674     * glDrawPixels(), glBitmap(), and other functions).
 675     */
 676    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 677       _swrast_CreateContext(ctx);
 678    }
 679
 680    _vbo_CreateContext(ctx);
 681    if (ctx->swrast_context) {
 682       _tnl_CreateContext(ctx);
 683       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 684       _swsetup_CreateContext(ctx);
 685
 686       /* Configure swrast to match hardware characteristics: */
 687       _swrast_allow_pixel_fog(ctx, false);
 688       _swrast_allow_vertex_fog(ctx, true);
 689    }
 690
 691    _mesa_meta_init(ctx);
 692
 693    brw_process_driconf_options(brw);
 694    brw_process_intel_debug_variable(brw);
 695    brw_initialize_context_constants(brw);
 696
 697    ctx->Const.ResetStrategy = notify_reset
 698       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 699
 700    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 701    _mesa_init_point(ctx);
 702
 703    intel_fbo_init(brw);
 704
 705    intel_batchbuffer_init(brw);
 706
 707    if (brw->gen >= 6) {
 708       /* Create a new hardware context.  Using a hardware context means that
 709        * our GPU state will be saved/restored on context switch, allowing us
 710        * to assume that the GPU is in the same state we left it in.
 711        *
 712        * This is required for transform feedback buffer offsets, query objects,
 713        * and also allows us to reduce how much state we have to emit.
 714        */
 715       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 716
 717       if (!brw->hw_ctx) {
 718          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 719          intelDestroyContext(driContextPriv);
 720          return false;
 721       }
 722    }
 723
 724    brw_init_state(brw);
 725
 726    intelInitExtensions(ctx);
 727
 728    brw_init_surface_formats(brw);
 729
 730    brw->max_vs_threads = devinfo->max_vs_threads;
 731    brw->max_gs_threads = devinfo->max_gs_threads;
 732    brw->max_wm_threads = devinfo->max_wm_threads;
 733    brw->urb.size = devinfo->urb.size;
 734    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 735    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 736    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 737
 738    /* Estimate the size of the mappable aperture into the GTT.  There's an
 739     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 740     * It turns out it's basically always 256MB, though some ancient hardware
 741     * was smaller.
 742     */
 743    uint32_t gtt_size = 256 * 1024 * 1024;
 744
 745    /* We don't want to map two objects such that a memcpy between them would
 746     * just fault one mapping in and then the other over and over forever.  So
 747     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 748     * taken up by things like the framebuffer and the ringbuffer and such, so
 749     * be more conservative.
 750     */
 751    brw->max_gtt_map_object_size = gtt_size / 4;
 752
 753    if (brw->gen == 6)
 754       brw->urb.gen6_gs_previously_active = false;
 755
 756    brw->prim_restart.in_progress = false;
 757    brw->prim_restart.enable_cut_index = false;
 758    brw->gs.enabled = false;
 759
 760    ctx->VertexProgram._MaintainTnlProgram = true;
 761    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 762
 763    brw_draw_init( brw );
 764
 765    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 766       /* Turn on some extra GL_ARB_debug_output generation. */
 767       brw->perf_debug = true;
 768    }
 769
 770    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 771       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 772
 773    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 774       brw_init_shader_time(brw);
 775
 776    _mesa_compute_version(ctx);
 777
 778    _mesa_initialize_dispatch_tables(ctx);
 779    _mesa_initialize_vbo_vtxfmt(ctx);
 780
 781    if (ctx->Extensions.AMD_performance_monitor) {
 782       brw_init_performance_monitors(brw);
 783    }
 784
 785    vbo_use_buffer_objects(ctx);
 786    vbo_always_unmap_buffers(ctx);
 787
 788    return true;
 789 }
 790
 791 void
 792 intelDestroyContext(__DRIcontext * driContextPriv)
 793 {
 794    struct brw_context *brw =
 795       (struct brw_context *) driContextPriv->driverPrivate;
 796    struct gl_context *ctx = &brw->ctx;
 797
 798    assert(brw); /* should never be null */
 799    if (!brw)
 800       return;
 801
 802    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 803    if (INTEL_DEBUG & DEBUG_AUB) {
 804       intel_batchbuffer_flush(brw);
 805       aub_dump_bmp(&brw->ctx);
 806    }
 807
 808    _mesa_meta_free(&brw->ctx);
 809
 810    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 811       /* Force a report. */
 812       brw->shader_time.report_time = 0;
 813
 814       brw_collect_and_report_shader_time(brw);
 815       brw_destroy_shader_time(brw);
 816    }
 817
 818    brw_destroy_state(brw);
 819    brw_draw_destroy(brw);
 820
 821    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 822
 823    drm_intel_gem_context_destroy(brw->hw_ctx);
 824
 825    if (ctx->swrast_context) {
 826       _swsetup_DestroyContext(&brw->ctx);
 827       _tnl_DestroyContext(&brw->ctx);
 828    }
 829    _vbo_DestroyContext(&brw->ctx);
 830
 831    if (ctx->swrast_context)
 832       _swrast_DestroyContext(&brw->ctx);
 833
 834    intel_batchbuffer_free(brw);
 835
 836    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 837    brw->first_post_swapbuffers_batch = NULL;
 838
 839    driDestroyOptionCache(&brw->optionCache);
 840
 841    /* free the Mesa context */
 842    _mesa_free_context_data(&brw->ctx);
 843
 844    ralloc_free(brw);
 845    driContextPriv->driverPrivate = NULL;
 846 }
 847
 848 GLboolean
 849 intelUnbindContext(__DRIcontext * driContextPriv)
 850 {
 851    /* Unset current context and dispath table */
 852    _mesa_make_current(NULL, NULL, NULL);
 853
 854    return true;
 855 }
 856
 857 /**
 858  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 859  * on window system framebuffers.
 860  *
 861  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 862  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 863  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 864  * for a visual where you're guaranteed to be capable, but it turns out that
 865  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 866  * incapable ones, becuase there's no difference between the two in resources
 867  * used.  Applications thus get built that accidentally rely on the default
 868  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 869  * great...
 870  *
 871  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 872  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 873  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 874  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 875  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 876  * and get no sRGB encode (assuming that both kinds of visual are available).
 877  * Thus our choice to support sRGB by default on our visuals for desktop would
 878  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 879  *
 880  * Unfortunately, renderbuffer setup happens before a context is created.  So
 881  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 882  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 883  * yet), we go turn that back off before anyone finds out.
 884  */
 885 static void
 886 intel_gles3_srgb_workaround(struct brw_context *brw,
 887                             struct gl_framebuffer *fb)
 888 {
 889    struct gl_context *ctx = &brw->ctx;
 890
 891    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 892       return;
 893
 894    /* Some day when we support the sRGB capable bit on visuals available for
 895     * GLES, we'll need to respect that and not disable things here.
 896     */
 897    fb->Visual.sRGBCapable = false;
 898    for (int i = 0; i < BUFFER_COUNT; i++) {
 899       if (fb->Attachment[i].Renderbuffer &&
 900           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 901          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 902       }
 903    }
 904 }
 905
 906 GLboolean
 907 intelMakeCurrent(__DRIcontext * driContextPriv,
 908                  __DRIdrawable * driDrawPriv,
 909                  __DRIdrawable * driReadPriv)
 910 {
 911    struct brw_context *brw;
 912    GET_CURRENT_CONTEXT(curCtx);
 913
 914    if (driContextPriv)
 915       brw = (struct brw_context *) driContextPriv->driverPrivate;
 916    else
 917       brw = NULL;
 918
 919    /* According to the glXMakeCurrent() man page: "Pending commands to
 920     * the previous context, if any, are flushed before it is released."
 921     * But only flush if we're actually changing contexts.
 922     */
 923    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 924       _mesa_flush(curCtx);
 925    }
 926
 927    if (driContextPriv) {
 928       struct gl_context *ctx = &brw->ctx;
 929       struct gl_framebuffer *fb, *readFb;
 930
 931       if (driDrawPriv == NULL && driReadPriv == NULL) {
 932          fb = _mesa_get_incomplete_framebuffer();
 933          readFb = _mesa_get_incomplete_framebuffer();
 934       } else {
 935          fb = driDrawPriv->driverPrivate;
 936          readFb = driReadPriv->driverPrivate;
 937          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 938          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 939       }
 940
 941       /* The sRGB workaround changes the renderbuffer's format. We must change
 942        * the format before the renderbuffer's miptree get's allocated, otherwise
 943        * the formats of the renderbuffer and its miptree will differ.
 944        */
 945       intel_gles3_srgb_workaround(brw, fb);
 946       intel_gles3_srgb_workaround(brw, readFb);
 947
 948       /* If the context viewport hasn't been initialized, force a call out to
 949        * the loader to get buffers so we have a drawable size for the initial
 950        * viewport. */
 951       if (!brw->ctx.ViewportInitialized)
 952          intel_prepare_render(brw);
 953
 954       _mesa_make_current(ctx, fb, readFb);
 955    } else {
 956       _mesa_make_current(NULL, NULL, NULL);
 957    }
 958
 959    return true;
 960 }
 961
 962 void
 963 intel_resolve_for_dri2_flush(struct brw_context *brw,
 964                              __DRIdrawable *drawable)
 965 {
 966    if (brw->gen < 6) {
 967       /* MSAA and fast color clear are not supported, so don't waste time
 968        * checking whether a resolve is needed.
 969        */
 970       return;
 971    }
 972
 973    struct gl_framebuffer *fb = drawable->driverPrivate;
 974    struct intel_renderbuffer *rb;
 975
 976    /* Usually, only the back buffer will need to be downsampled. However,
 977     * the front buffer will also need it if the user has rendered into it.
 978     */
 979    static const gl_buffer_index buffers[2] = {
 980          BUFFER_BACK_LEFT,
 981          BUFFER_FRONT_LEFT,
 982    };
 983
 984    for (int i = 0; i < 2; ++i) {
 985       rb = intel_get_renderbuffer(fb, buffers[i]);
 986       if (rb == NULL || rb->mt == NULL)
 987          continue;
 988       if (rb->mt->num_samples <= 1)
 989          intel_miptree_resolve_color(brw, rb->mt);
 990       else
 991          intel_renderbuffer_downsample(brw, rb);
 992    }
 993 }
 994
 995 static unsigned
 996 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
 997 {
 998    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
 999 }
1000
1001 static void
1002 intel_query_dri2_buffers(struct brw_context *brw,
1003                          __DRIdrawable *drawable,
1004                          __DRIbuffer **buffers,
1005                          int *count);
1006
1007 static void
1008 intel_process_dri2_buffer(struct brw_context *brw,
1009                           __DRIdrawable *drawable,
1010                           __DRIbuffer *buffer,
1011                           struct intel_renderbuffer *rb,
1012                           const char *buffer_name);
1013
1014 static void
1015 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1016
1017 static void
1018 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1019 {
1020    struct gl_framebuffer *fb = drawable->driverPrivate;
1021    struct intel_renderbuffer *rb;
1022    __DRIbuffer *buffers = NULL;
1023    int i, count;
1024    const char *region_name;
1025
1026    /* Set this up front, so that in case our buffers get invalidated
1027     * while we're getting new buffers, we don't clobber the stamp and
1028     * thus ignore the invalidate. */
1029    drawable->lastStamp = drawable->dri2.stamp;
1030
1031    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1032       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1033
1034    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1035
1036    if (buffers == NULL)
1037       return;
1038
1039    for (i = 0; i < count; i++) {
1040        switch (buffers[i].attachment) {
1041        case __DRI_BUFFER_FRONT_LEFT:
1042            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1043            region_name = "dri2 front buffer";
1044            break;
1045
1046        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1047            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1048            region_name = "dri2 fake front buffer";
1049            break;
1050
1051        case __DRI_BUFFER_BACK_LEFT:
1052            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1053            region_name = "dri2 back buffer";
1054            break;
1055
1056        case __DRI_BUFFER_DEPTH:
1057        case __DRI_BUFFER_HIZ:
1058        case __DRI_BUFFER_DEPTH_STENCIL:
1059        case __DRI_BUFFER_STENCIL:
1060        case __DRI_BUFFER_ACCUM:
1061        default:
1062            fprintf(stderr,
1063                    "unhandled buffer attach event, attachment type %d\n",
1064                    buffers[i].attachment);
1065            return;
1066        }
1067
1068        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1069    }
1070
1071 }
1072
1073 void
1074 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1075 {
1076    struct brw_context *brw = context->driverPrivate;
1077    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1078
1079    /* Set this up front, so that in case our buffers get invalidated
1080     * while we're getting new buffers, we don't clobber the stamp and
1081     * thus ignore the invalidate. */
1082    drawable->lastStamp = drawable->dri2.stamp;
1083
1084    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1085       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1086
1087    if (screen->image.loader)
1088       intel_update_image_buffers(brw, drawable);
1089    else
1090       intel_update_dri2_buffers(brw, drawable);
1091
1092    driUpdateFramebufferSize(&brw->ctx, drawable);
1093 }
1094
1095 /**
1096  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1097  * state is required.
1098  */
1099 void
1100 intel_prepare_render(struct brw_context *brw)
1101 {
1102    struct gl_context *ctx = &brw->ctx;
1103    __DRIcontext *driContext = brw->driContext;
1104    __DRIdrawable *drawable;
1105
1106    drawable = driContext->driDrawablePriv;
1107    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1108       if (drawable->lastStamp != drawable->dri2.stamp)
1109          intel_update_renderbuffers(driContext, drawable);
1110       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1111    }
1112
1113    drawable = driContext->driReadablePriv;
1114    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1115       if (drawable->lastStamp != drawable->dri2.stamp)
1116          intel_update_renderbuffers(driContext, drawable);
1117       driContext->dri2.read_stamp = drawable->dri2.stamp;
1118    }
1119
1120    /* If we're currently rendering to the front buffer, the rendering
1121     * that will happen next will probably dirty the front buffer.  So
1122     * mark it as dirty here.
1123     */
1124    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1125       brw->front_buffer_dirty = true;
1126
1127    /* Wait for the swapbuffers before the one we just emitted, so we
1128     * don't get too many swaps outstanding for apps that are GPU-heavy
1129     * but not CPU-heavy.
1130     *
1131     * We're using intelDRI2Flush (called from the loader before
1132     * swapbuffer) and glFlush (for front buffer rendering) as the
1133     * indicator that a frame is done and then throttle when we get
1134     * here as we prepare to render the next frame.  At this point for
1135     * round trips for swap/copy and getting new buffers are done and
1136     * we'll spend less time waiting on the GPU.
1137     *
1138     * Unfortunately, we don't have a handle to the batch containing
1139     * the swap, and getting our hands on that doesn't seem worth it,
1140     * so we just us the first batch we emitted after the last swap.
1141     */
1142    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1143       if (!brw->disable_throttling)
1144          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1145       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1146       brw->first_post_swapbuffers_batch = NULL;
1147       brw->need_throttle = false;
1148    }
1149 }
1150
1151 /**
1152  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1153  *
1154  * To determine which DRI buffers to request, examine the renderbuffers
1155  * attached to the drawable's framebuffer. Then request the buffers with
1156  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1157  *
1158  * This is called from intel_update_renderbuffers().
1159  *
1160  * \param drawable      Drawable whose buffers are queried.
1161  * \param buffers       [out] List of buffers returned by DRI2 query.
1162  * \param buffer_count  [out] Number of buffers returned.
1163  *
1164  * \see intel_update_renderbuffers()
1165  * \see DRI2GetBuffers()
1166  * \see DRI2GetBuffersWithFormat()
1167  */
1168 static void
1169 intel_query_dri2_buffers(struct brw_context *brw,
1170                          __DRIdrawable *drawable,
1171                          __DRIbuffer **buffers,
1172                          int *buffer_count)
1173 {
1174    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1175    struct gl_framebuffer *fb = drawable->driverPrivate;
1176    int i = 0;
1177    unsigned attachments[8];
1178
1179    struct intel_renderbuffer *front_rb;
1180    struct intel_renderbuffer *back_rb;
1181
1182    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1183    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1184
1185    memset(attachments, 0, sizeof(attachments));
1186    if ((brw_is_front_buffer_drawing(fb) ||
1187         brw_is_front_buffer_reading(fb) ||
1188         !back_rb) && front_rb) {
1189       /* If a fake front buffer is in use, then querying for
1190        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1191        * the real front buffer to the fake front buffer.  So before doing the
1192        * query, we need to make sure all the pending drawing has landed in the
1193        * real front buffer.
1194        */
1195       intel_batchbuffer_flush(brw);
1196       intel_flush_front(&brw->ctx);
1197
1198       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1199       attachments[i++] = intel_bits_per_pixel(front_rb);
1200    } else if (front_rb && brw->front_buffer_dirty) {
1201       /* We have pending front buffer rendering, but we aren't querying for a
1202        * front buffer.  If the front buffer we have is a fake front buffer,
1203        * the X server is going to throw it away when it processes the query.
1204        * So before doing the query, make sure all the pending drawing has
1205        * landed in the real front buffer.
1206        */
1207       intel_batchbuffer_flush(brw);
1208       intel_flush_front(&brw->ctx);
1209    }
1210
1211    if (back_rb) {
1212       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1213       attachments[i++] = intel_bits_per_pixel(back_rb);
1214    }
1215
1216    assert(i <= ARRAY_SIZE(attachments));
1217
1218    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1219                                                         &drawable->w,
1220                                                         &drawable->h,
1221                                                         attachments, i / 2,
1222                                                         buffer_count,
1223                                                         drawable->loaderPrivate);
1224 }
1225
1226 /**
1227  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1228  *
1229  * This is called from intel_update_renderbuffers().
1230  *
1231  * \par Note:
1232  *    DRI buffers whose attachment point is DRI2BufferStencil or
1233  *    DRI2BufferDepthStencil are handled as special cases.
1234  *
1235  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1236  *        that is passed to drm_intel_bo_gem_create_from_name().
1237  *
1238  * \see intel_update_renderbuffers()
1239  */
1240 static void
1241 intel_process_dri2_buffer(struct brw_context *brw,
1242                           __DRIdrawable *drawable,
1243                           __DRIbuffer *buffer,
1244                           struct intel_renderbuffer *rb,
1245                           const char *buffer_name)
1246 {
1247    struct gl_framebuffer *fb = drawable->driverPrivate;
1248    drm_intel_bo *bo;
1249
1250    if (!rb)
1251       return;
1252
1253    unsigned num_samples = rb->Base.Base.NumSamples;
1254
1255    /* We try to avoid closing and reopening the same BO name, because the first
1256     * use of a mapping of the buffer involves a bunch of page faulting which is
1257     * moderately expensive.
1258     */
1259    struct intel_mipmap_tree *last_mt;
1260    if (num_samples == 0)
1261       last_mt = rb->mt;
1262    else
1263       last_mt = rb->singlesample_mt;
1264
1265    uint32_t old_name = 0;
1266    if (last_mt) {
1267        /* The bo already has a name because the miptree was created by a
1268         * previous call to intel_process_dri2_buffer(). If a bo already has a
1269         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1270         * create a new name.
1271         */
1272       drm_intel_bo_flink(last_mt->bo, &old_name);
1273    }
1274
1275    if (old_name == buffer->name)
1276       return;
1277
1278    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1279       fprintf(stderr,
1280               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1281               buffer->name, buffer->attachment,
1282               buffer->cpp, buffer->pitch);
1283    }
1284
1285    intel_miptree_release(&rb->mt);
1286    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1287                                           buffer->name);
1288    if (!bo) {
1289       fprintf(stderr,
1290               "Failed to open BO for returned DRI2 buffer "
1291               "(%dx%d, %s, named %d).\n"
1292               "This is likely a bug in the X Server that will lead to a "
1293               "crash soon.\n",
1294               drawable->w, drawable->h, buffer_name, buffer->name);
1295       return;
1296    }
1297
1298    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1299                                             drawable->w, drawable->h,
1300                                             buffer->pitch);
1301
1302    if (brw_is_front_buffer_drawing(fb) &&
1303        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1304         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1305        rb->Base.Base.NumSamples > 1) {
1306       intel_renderbuffer_upsample(brw, rb);
1307    }
1308
1309    assert(rb->mt);
1310
1311    drm_intel_bo_unreference(bo);
1312 }
1313
1314 /**
1315  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1316  *
1317  * To determine which DRI buffers to request, examine the renderbuffers
1318  * attached to the drawable's framebuffer. Then request the buffers from
1319  * the image loader
1320  *
1321  * This is called from intel_update_renderbuffers().
1322  *
1323  * \param drawable      Drawable whose buffers are queried.
1324  * \param buffers       [out] List of buffers returned by DRI2 query.
1325  * \param buffer_count  [out] Number of buffers returned.
1326  *
1327  * \see intel_update_renderbuffers()
1328  */
1329
1330 static void
1331 intel_update_image_buffer(struct brw_context *intel,
1332                           __DRIdrawable *drawable,
1333                           struct intel_renderbuffer *rb,
1334                           __DRIimage *buffer,
1335                           enum __DRIimageBufferMask buffer_type)
1336 {
1337    struct gl_framebuffer *fb = drawable->driverPrivate;
1338
1339    if (!rb || !buffer->bo)
1340       return;
1341
1342    unsigned num_samples = rb->Base.Base.NumSamples;
1343
1344    /* Check and see if we're already bound to the right
1345     * buffer object
1346     */
1347    struct intel_mipmap_tree *last_mt;
1348    if (num_samples == 0)
1349       last_mt = rb->mt;
1350    else
1351       last_mt = rb->singlesample_mt;
1352
1353    if (last_mt && last_mt->bo == buffer->bo)
1354       return;
1355
1356    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1357                                             buffer->width, buffer->height,
1358                                             buffer->pitch);
1359
1360    if (brw_is_front_buffer_drawing(fb) &&
1361        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1362        rb->Base.Base.NumSamples > 1) {
1363       intel_renderbuffer_upsample(intel, rb);
1364    }
1365 }
1366
1367 static void
1368 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1369 {
1370    struct gl_framebuffer *fb = drawable->driverPrivate;
1371    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1372    struct intel_renderbuffer *front_rb;
1373    struct intel_renderbuffer *back_rb;
1374    struct __DRIimageList images;
1375    unsigned int format;
1376    uint32_t buffer_mask = 0;
1377
1378    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1379    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1380
1381    if (back_rb)
1382       format = intel_rb_format(back_rb);
1383    else if (front_rb)
1384       format = intel_rb_format(front_rb);
1385    else
1386       return;
1387
1388    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1389                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1390       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1391    }
1392
1393    if (back_rb)
1394       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1395
1396    (*screen->image.loader->getBuffers) (drawable,
1397                                         driGLFormatToImageFormat(format),
1398                                         &drawable->dri2.stamp,
1399                                         drawable->loaderPrivate,
1400                                         buffer_mask,
1401                                         &images);
1402
1403    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1404       drawable->w = images.front->width;
1405       drawable->h = images.front->height;
1406       intel_update_image_buffer(brw,
1407                                 drawable,
1408                                 front_rb,
1409                                 images.front,
1410                                 __DRI_IMAGE_BUFFER_FRONT);
1411    }
1412    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1413       drawable->w = images.back->width;
1414       drawable->h = images.back->height;
1415       intel_update_image_buffer(brw,
1416                                 drawable,
1417                                 back_rb,
1418                                 images.back,
1419                                 __DRI_IMAGE_BUFFER_BACK);
1420    }
1421 }