src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/imports.h"
  38 #include "main/macros.h"
  39 #include "main/points.h"
  40 #include "main/version.h"
  41 #include "main/vtxfmt.h"
  42
  43 #include "vbo/vbo_context.h"
  44
  45 #include "drivers/common/driverfuncs.h"
  46 #include "drivers/common/meta.h"
  47 #include "utils.h"
  48
  49 #include "brw_context.h"
  50 #include "brw_defines.h"
  51 #include "brw_draw.h"
  52 #include "brw_state.h"
  53
  54 #include "intel_batchbuffer.h"
  55 #include "intel_buffer_objects.h"
  56 #include "intel_buffers.h"
  57 #include "intel_fbo.h"
  58 #include "intel_mipmap_tree.h"
  59 #include "intel_pixel.h"
  60 #include "intel_regions.h"
  61 #include "intel_tex.h"
  62 #include "intel_tex_obj.h"
  63
  64 #include "swrast_setup/swrast_setup.h"
  65 #include "tnl/tnl.h"
  66 #include "tnl/t_pipeline.h"
  67 #include "glsl/ralloc.h"
  68
  69 /***************************************
  70  * Mesa's Driver Functions
  71  ***************************************/
  72
  73 static size_t
  74 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  75                              GLenum internalFormat, int samples[16])
  76 {
  77    struct brw_context *brw = brw_context(ctx);
  78
  79    (void) target;
  80
  81    switch (brw->gen) {
  82    case 7:
  83       samples[0] = 8;
  84       samples[1] = 4;
  85       return 2;
  86
  87    case 6:
  88       samples[0] = 4;
  89       return 1;
  90
  91    default:
  92       samples[0] = 1;
  93       return 1;
  94    }
  95 }
  96
  97 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  98
  99 const char *
 100 brw_get_renderer_string(unsigned deviceID)
 101 {
 102    const char *chipset;
 103    static char buffer[128];
 104
 105    switch (deviceID) {
 106 #undef CHIPSET
 107 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 108 #include "pci_ids/i965_pci_ids.h"
 109    default:
 110       chipset = "Unknown Intel Chipset";
 111       break;
 112    }
 113
 114    (void) driGetRendererString(buffer, chipset, 0);
 115    return buffer;
 116 }
 117
 118 static const GLubyte *
 119 intelGetString(struct gl_context * ctx, GLenum name)
 120 {
 121    const struct brw_context *const brw = brw_context(ctx);
 122
 123    switch (name) {
 124    case GL_VENDOR:
 125       return (GLubyte *) brw_vendor_string;
 126
 127    case GL_RENDERER:
 128       return
 129          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 130
 131    default:
 132       return NULL;
 133    }
 134 }
 135
 136 static void
 137 intel_viewport(struct gl_context *ctx)
 138 {
 139    struct brw_context *brw = brw_context(ctx);
 140    __DRIcontext *driContext = brw->driContext;
 141
 142    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 143       dri2InvalidateDrawable(driContext->driDrawablePriv);
 144       dri2InvalidateDrawable(driContext->driReadablePriv);
 145    }
 146 }
 147
 148 static void
 149 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
 150 {
 151    struct brw_context *brw = brw_context(ctx);
 152
 153    if (ctx->swrast_context)
 154       _swrast_InvalidateState(ctx, new_state);
 155    _vbo_InvalidateState(ctx, new_state);
 156
 157    brw->NewGLState |= new_state;
 158 }
 159
 160 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 161
 162 static void
 163 intel_flush_front(struct gl_context *ctx)
 164 {
 165    struct brw_context *brw = brw_context(ctx);
 166    __DRIcontext *driContext = brw->driContext;
 167    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 168    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 169
 170    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 171       if (flushFront(screen) && driDrawable &&
 172           driDrawable->loaderPrivate) {
 173
 174          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 175           *
 176           * This potentially resolves both front and back buffer. It
 177           * is unnecessary to resolve the back, but harms nothing except
 178           * performance. And no one cares about front-buffer render
 179           * performance.
 180           */
 181          intel_resolve_for_dri2_flush(brw, driDrawable);
 182          intel_batchbuffer_flush(brw);
 183
 184          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 185
 186          /* We set the dirty bit in intel_prepare_render() if we're
 187           * front buffer rendering once we get there.
 188           */
 189          brw->front_buffer_dirty = false;
 190       }
 191    }
 192 }
 193
 194 static void
 195 intel_glFlush(struct gl_context *ctx)
 196 {
 197    struct brw_context *brw = brw_context(ctx);
 198
 199    intel_batchbuffer_flush(brw);
 200    intel_flush_front(ctx);
 201    if (brw->is_front_buffer_rendering)
 202       brw->need_throttle = true;
 203 }
 204
 205 void
 206 intelFinish(struct gl_context * ctx)
 207 {
 208    struct brw_context *brw = brw_context(ctx);
 209
 210    intel_glFlush(ctx);
 211
 212    if (brw->batch.last_bo)
 213       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 214 }
 215
 216 static void
 217 brw_init_driver_functions(struct brw_context *brw,
 218                           struct dd_function_table *functions)
 219 {
 220    _mesa_init_driver_functions(functions);
 221
 222    /* GLX uses DRI2 invalidate events to handle window resizing.
 223     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 224     * which doesn't provide a mechanism for snooping the event queues.
 225     *
 226     * So EGL still relies on viewport hacks to handle window resizing.
 227     * This should go away with DRI3000.
 228     */
 229    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 230       functions->Viewport = intel_viewport;
 231
 232    functions->Flush = intel_glFlush;
 233    functions->Finish = intelFinish;
 234    functions->GetString = intelGetString;
 235    functions->UpdateState = intelInvalidateState;
 236
 237    intelInitTextureFuncs(functions);
 238    intelInitTextureImageFuncs(functions);
 239    intelInitTextureSubImageFuncs(functions);
 240    intelInitTextureCopyImageFuncs(functions);
 241    intelInitClearFuncs(functions);
 242    intelInitBufferFuncs(functions);
 243    intelInitPixelFuncs(functions);
 244    intelInitBufferObjectFuncs(functions);
 245    intel_init_syncobj_functions(functions);
 246    brw_init_object_purgeable_functions(functions);
 247
 248    brwInitFragProgFuncs( functions );
 249    brw_init_common_queryobj_functions(functions);
 250    if (brw->gen >= 6)
 251       gen6_init_queryobj_functions(functions);
 252    else
 253       gen4_init_queryobj_functions(functions);
 254
 255    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 256
 257    functions->NewTransformFeedback = brw_new_transform_feedback;
 258    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 259    functions->GetTransformFeedbackVertexCount =
 260       brw_get_transform_feedback_vertex_count;
 261    if (brw->gen >= 7) {
 262       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 263       functions->EndTransformFeedback = gen7_end_transform_feedback;
 264       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 265       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 266    } else {
 267       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 268       functions->EndTransformFeedback = brw_end_transform_feedback;
 269    }
 270
 271    if (brw->gen >= 6)
 272       functions->GetSamplePosition = gen6_get_sample_position;
 273 }
 274
 275 static void
 276 brw_initialize_context_constants(struct brw_context *brw)
 277 {
 278    struct gl_context *ctx = &brw->ctx;
 279
 280    unsigned max_samplers =
 281       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 282
 283    ctx->Const.QueryCounterBits.Timestamp = 36;
 284
 285    ctx->Const.StripTextureBorder = true;
 286
 287    ctx->Const.MaxDualSourceDrawBuffers = 1;
 288    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 289    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 290    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 291    ctx->Const.MaxTextureUnits =
 292       MIN2(ctx->Const.MaxTextureCoordUnits,
 293            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 294    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 295    if (brw->gen >= 7)
 296       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 297    else
 298       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 299    if (getenv("INTEL_COMPUTE_SHADER")) {
 300       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 301       ctx->Const.MaxUniformBufferBindings += 12;
 302    } else {
 303       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 304    }
 305    ctx->Const.MaxCombinedTextureImageUnits =
 306       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 307       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 308       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 309       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 310
 311    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 312    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 313       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 314    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 315    ctx->Const.MaxCubeTextureLevels = 12;
 316
 317    if (brw->gen >= 7)
 318       ctx->Const.MaxArrayTextureLayers = 2048;
 319    else
 320       ctx->Const.MaxArrayTextureLayers = 512;
 321
 322    ctx->Const.MaxTextureRectSize = 1 << 12;
 323
 324    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 325
 326    ctx->Const.MaxRenderbufferSize = 8192;
 327
 328    /* Hardware only supports a limited number of transform feedback buffers.
 329     * So we need to override the Mesa default (which is based only on software
 330     * limits).
 331     */
 332    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 333
 334    /* On Gen6, in the worst case, we use up one binding table entry per
 335     * transform feedback component (see comments above the definition of
 336     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 337     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 338     * BRW_MAX_SOL_BINDINGS.
 339     *
 340     * In "separate components" mode, we need to divide this value by
 341     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 342     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 343     */
 344    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 345    ctx->Const.MaxTransformFeedbackSeparateComponents =
 346       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 347
 348    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 349
 350    int max_samples;
 351    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 352    const int clamp_max_samples =
 353       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 354
 355    if (clamp_max_samples < 0) {
 356       max_samples = msaa_modes[0];
 357    } else {
 358       /* Select the largest supported MSAA mode that does not exceed
 359        * clamp_max_samples.
 360        */
 361       max_samples = 0;
 362       for (int i = 0; msaa_modes[i] != 0; ++i) {
 363          if (msaa_modes[i] <= clamp_max_samples) {
 364             max_samples = msaa_modes[i];
 365             break;
 366          }
 367       }
 368    }
 369
 370    ctx->Const.MaxSamples = max_samples;
 371    ctx->Const.MaxColorTextureSamples = max_samples;
 372    ctx->Const.MaxDepthTextureSamples = max_samples;
 373    ctx->Const.MaxIntegerSamples = max_samples;
 374
 375    if (brw->gen >= 7)
 376       ctx->Const.MaxProgramTextureGatherComponents = 4;
 377    else if (brw->gen == 6)
 378       ctx->Const.MaxProgramTextureGatherComponents = 1;
 379
 380    ctx->Const.MinLineWidth = 1.0;
 381    ctx->Const.MinLineWidthAA = 1.0;
 382    ctx->Const.MaxLineWidth = 5.0;
 383    ctx->Const.MaxLineWidthAA = 5.0;
 384    ctx->Const.LineWidthGranularity = 0.5;
 385
 386    ctx->Const.MinPointSize = 1.0;
 387    ctx->Const.MinPointSizeAA = 1.0;
 388    ctx->Const.MaxPointSize = 255.0;
 389    ctx->Const.MaxPointSizeAA = 255.0;
 390    ctx->Const.PointSizeGranularity = 1.0;
 391
 392    if (brw->gen >= 5 || brw->is_g4x)
 393       ctx->Const.MaxClipPlanes = 8;
 394
 395    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 396    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 397    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 398    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 399    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 400    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 401    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 402    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 403    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 404    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 405    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 406    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 407       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 408            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 409
 410    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 411    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 412    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 413    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 414    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 415    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 416    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 417    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 418    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 419       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 420            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 421
 422    /* Fragment shaders use real, 32-bit twos-complement integers for all
 423     * integer types.
 424     */
 425    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 426    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 427    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 428    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 429    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 430
 431    if (brw->gen >= 7) {
 432       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 433       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 434       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 435       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 436       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 437       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 438       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 439       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 440       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 441    }
 442
 443    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 444     * but we're not sure how it's actually done for vertex order,
 445     * that affect provoking vertex decision. Always use last vertex
 446     * convention for quad primitive which works as expected for now.
 447     */
 448    if (brw->gen >= 6)
 449       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 450
 451    ctx->Const.NativeIntegers = true;
 452    ctx->Const.UniformBooleanTrue = 1;
 453
 454    /* From the gen4 PRM, volume 4 page 127:
 455     *
 456     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 457     *      the base address of the first element of the surface, computed in
 458     *      software by adding the surface base address to the byte offset of
 459     *      the element in the buffer."
 460     *
 461     * However, unaligned accesses are slower, so enforce buffer alignment.
 462     */
 463    ctx->Const.UniformBufferOffsetAlignment = 16;
 464    ctx->Const.TextureBufferOffsetAlignment = 16;
 465
 466    if (brw->gen >= 6) {
 467       ctx->Const.MaxVarying = 32;
 468       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 469       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 470       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 471       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 472    }
 473
 474    /* We want the GLSL compiler to emit code that uses condition codes */
 475    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 476       ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 477       ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
 478       ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
 479       ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
 480       ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 481       ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
 482
 483       ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
 484          (i == MESA_SHADER_FRAGMENT);
 485       ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
 486          (i == MESA_SHADER_FRAGMENT);
 487       ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
 488    }
 489
 490    ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 491    ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 492
 493    /* ARB_viewport_array */
 494    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 495       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 496       ctx->Const.ViewportSubpixelBits = 0;
 497
 498       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 499        */
 500       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 501       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 502    }
 503 }
 504
 505 /**
 506  * Process driconf (drirc) options, setting appropriate context flags.
 507  *
 508  * intelInitExtensions still pokes at optionCache directly, in order to
 509  * avoid advertising various extensions.  No flags are set, so it makes
 510  * sense to continue doing that there.
 511  */
 512 static void
 513 brw_process_driconf_options(struct brw_context *brw)
 514 {
 515    struct gl_context *ctx = &brw->ctx;
 516
 517    driOptionCache *options = &brw->optionCache;
 518    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 519                        brw->driContext->driScreenPriv->myNum, "i965");
 520
 521    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 522    switch (bo_reuse_mode) {
 523    case DRI_CONF_BO_REUSE_DISABLED:
 524       break;
 525    case DRI_CONF_BO_REUSE_ALL:
 526       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 527       break;
 528    }
 529
 530    if (!driQueryOptionb(options, "hiz")) {
 531        brw->has_hiz = false;
 532        /* On gen6, you can only do separate stencil with HIZ. */
 533        if (brw->gen == 6)
 534           brw->has_separate_stencil = false;
 535    }
 536
 537    if (driQueryOptionb(options, "always_flush_batch")) {
 538       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 539       brw->always_flush_batch = true;
 540    }
 541
 542    if (driQueryOptionb(options, "always_flush_cache")) {
 543       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 544       brw->always_flush_cache = true;
 545    }
 546
 547    if (driQueryOptionb(options, "disable_throttling")) {
 548       fprintf(stderr, "disabling flush throttling\n");
 549       brw->disable_throttling = true;
 550    }
 551
 552    brw->disable_derivative_optimization =
 553       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 554
 555    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 556
 557    ctx->Const.ForceGLSLExtensionsWarn =
 558       driQueryOptionb(options, "force_glsl_extensions_warn");
 559
 560    ctx->Const.DisableGLSLLineContinuations =
 561       driQueryOptionb(options, "disable_glsl_line_continuations");
 562 }
 563
 564 GLboolean
 565 brwCreateContext(gl_api api,
 566                  const struct gl_config *mesaVis,
 567                  __DRIcontext *driContextPriv,
 568                  unsigned major_version,
 569                  unsigned minor_version,
 570                  uint32_t flags,
 571                  bool notify_reset,
 572                  unsigned *dri_ctx_error,
 573                  void *sharedContextPrivate)
 574 {
 575    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 576    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 577    struct intel_screen *screen = sPriv->driverPrivate;
 578    const struct brw_device_info *devinfo = screen->devinfo;
 579    struct dd_function_table functions;
 580    struct gl_config visual;
 581
 582    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 583     * provides us with context reset notifications.
 584     */
 585    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 586       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 587
 588    if (screen->has_context_reset_notification)
 589       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 590
 591    if (flags & ~allowed_flags) {
 592       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 593       return false;
 594    }
 595
 596    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 597    if (!brw) {
 598       printf("%s: failed to alloc context\n", __FUNCTION__);
 599       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 600       return false;
 601    }
 602
 603    driContextPriv->driverPrivate = brw;
 604    brw->driContext = driContextPriv;
 605    brw->intelScreen = screen;
 606    brw->bufmgr = screen->bufmgr;
 607
 608    brw->gen = devinfo->gen;
 609    brw->gt = devinfo->gt;
 610    brw->is_g4x = devinfo->is_g4x;
 611    brw->is_baytrail = devinfo->is_baytrail;
 612    brw->is_haswell = devinfo->is_haswell;
 613    brw->has_llc = devinfo->has_llc;
 614    brw->has_hiz = devinfo->has_hiz_and_separate_stencil && brw->gen < 8;
 615    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 616    brw->has_pln = devinfo->has_pln;
 617    brw->has_compr4 = devinfo->has_compr4;
 618    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 619    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 620    brw->needs_unlit_centroid_workaround =
 621       devinfo->needs_unlit_centroid_workaround;
 622
 623    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 624    brw->has_swizzling = screen->hw_has_swizzling;
 625
 626    if (brw->gen >= 8) {
 627       gen8_init_vtable_surface_functions(brw);
 628       gen7_init_vtable_sampler_functions(brw);
 629       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 630    } else if (brw->gen >= 7) {
 631       gen7_init_vtable_surface_functions(brw);
 632       gen7_init_vtable_sampler_functions(brw);
 633       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 634    } else {
 635       gen4_init_vtable_surface_functions(brw);
 636       gen4_init_vtable_sampler_functions(brw);
 637       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 638    }
 639
 640    brw_init_driver_functions(brw, &functions);
 641
 642    if (notify_reset)
 643       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 644
 645    struct gl_context *ctx = &brw->ctx;
 646
 647    if (mesaVis == NULL) {
 648       memset(&visual, 0, sizeof visual);
 649       mesaVis = &visual;
 650    }
 651
 652    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 653       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 654       printf("%s: failed to init mesa context\n", __FUNCTION__);
 655       intelDestroyContext(driContextPriv);
 656       return false;
 657    }
 658
 659    driContextSetFlags(ctx, flags);
 660
 661    /* Initialize the software rasterizer and helper modules.
 662     *
 663     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 664     * software fallbacks (which we have to support on legacy GL to do weird
 665     * glDrawPixels(), glBitmap(), and other functions).
 666     */
 667    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 668       _swrast_CreateContext(ctx);
 669    }
 670
 671    _vbo_CreateContext(ctx);
 672    if (ctx->swrast_context) {
 673       _tnl_CreateContext(ctx);
 674       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 675       _swsetup_CreateContext(ctx);
 676
 677       /* Configure swrast to match hardware characteristics: */
 678       _swrast_allow_pixel_fog(ctx, false);
 679       _swrast_allow_vertex_fog(ctx, true);
 680    }
 681
 682    _mesa_meta_init(ctx);
 683
 684    brw_process_driconf_options(brw);
 685    brw_process_intel_debug_variable(brw);
 686    brw_initialize_context_constants(brw);
 687
 688    ctx->Const.ResetStrategy = notify_reset
 689       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 690
 691    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 692    _mesa_init_point(ctx);
 693
 694    intel_batchbuffer_init(brw);
 695
 696    brw_init_state(brw);
 697
 698    intelInitExtensions(ctx);
 699
 700    intel_fbo_init(brw);
 701
 702    if (brw->gen >= 6) {
 703       /* Create a new hardware context.  Using a hardware context means that
 704        * our GPU state will be saved/restored on context switch, allowing us
 705        * to assume that the GPU is in the same state we left it in.
 706        *
 707        * This is required for transform feedback buffer offsets, query objects,
 708        * and also allows us to reduce how much state we have to emit.
 709        */
 710       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 711
 712       if (!brw->hw_ctx) {
 713          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 714          intelDestroyContext(driContextPriv);
 715          return false;
 716       }
 717    }
 718
 719    brw_init_surface_formats(brw);
 720
 721    if (brw->is_g4x || brw->gen >= 5) {
 722       brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
 723       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
 724   } else {
 725       brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
 726       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
 727    }
 728
 729    brw->max_vs_threads = devinfo->max_vs_threads;
 730    brw->max_gs_threads = devinfo->max_gs_threads;
 731    brw->max_wm_threads = devinfo->max_wm_threads;
 732    brw->urb.size = devinfo->urb.size;
 733    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 734    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 735    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 736
 737    /* Estimate the size of the mappable aperture into the GTT.  There's an
 738     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 739     * It turns out it's basically always 256MB, though some ancient hardware
 740     * was smaller.
 741     */
 742    uint32_t gtt_size = 256 * 1024 * 1024;
 743
 744    /* We don't want to map two objects such that a memcpy between them would
 745     * just fault one mapping in and then the other over and over forever.  So
 746     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 747     * taken up by things like the framebuffer and the ringbuffer and such, so
 748     * be more conservative.
 749     */
 750    brw->max_gtt_map_object_size = gtt_size / 4;
 751
 752    if (brw->gen == 6)
 753       brw->urb.gen6_gs_previously_active = false;
 754
 755    brw->prim_restart.in_progress = false;
 756    brw->prim_restart.enable_cut_index = false;
 757    brw->gs.enabled = false;
 758
 759    if (brw->gen < 6) {
 760       brw->curbe.last_buf = calloc(1, 4096);
 761       brw->curbe.next_buf = calloc(1, 4096);
 762    }
 763
 764    ctx->VertexProgram._MaintainTnlProgram = true;
 765    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 766
 767    brw_draw_init( brw );
 768
 769    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 770       /* Turn on some extra GL_ARB_debug_output generation. */
 771       brw->perf_debug = true;
 772    }
 773
 774    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 775       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 776
 777    brw_fs_alloc_reg_sets(brw);
 778    brw_vec4_alloc_reg_set(brw);
 779
 780    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 781       brw_init_shader_time(brw);
 782
 783    _mesa_compute_version(ctx);
 784
 785    _mesa_initialize_dispatch_tables(ctx);
 786    _mesa_initialize_vbo_vtxfmt(ctx);
 787
 788    if (ctx->Extensions.AMD_performance_monitor) {
 789       brw_init_performance_monitors(brw);
 790    }
 791
 792    return true;
 793 }
 794
 795 void
 796 intelDestroyContext(__DRIcontext * driContextPriv)
 797 {
 798    struct brw_context *brw =
 799       (struct brw_context *) driContextPriv->driverPrivate;
 800    struct gl_context *ctx = &brw->ctx;
 801
 802    assert(brw); /* should never be null */
 803    if (!brw)
 804       return;
 805
 806    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 807    if (INTEL_DEBUG & DEBUG_AUB) {
 808       intel_batchbuffer_flush(brw);
 809       aub_dump_bmp(&brw->ctx);
 810    }
 811
 812    _mesa_meta_free(&brw->ctx);
 813
 814    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 815       /* Force a report. */
 816       brw->shader_time.report_time = 0;
 817
 818       brw_collect_and_report_shader_time(brw);
 819       brw_destroy_shader_time(brw);
 820    }
 821
 822    brw_destroy_state(brw);
 823    brw_draw_destroy(brw);
 824
 825    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 826    drm_intel_bo_unreference(brw->vs.base.const_bo);
 827    drm_intel_bo_unreference(brw->wm.base.const_bo);
 828
 829    free(brw->curbe.last_buf);
 830    free(brw->curbe.next_buf);
 831
 832    drm_intel_gem_context_destroy(brw->hw_ctx);
 833
 834    if (ctx->swrast_context) {
 835       _swsetup_DestroyContext(&brw->ctx);
 836       _tnl_DestroyContext(&brw->ctx);
 837    }
 838    _vbo_DestroyContext(&brw->ctx);
 839
 840    if (ctx->swrast_context)
 841       _swrast_DestroyContext(&brw->ctx);
 842
 843    intel_batchbuffer_free(brw);
 844
 845    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 846    brw->first_post_swapbuffers_batch = NULL;
 847
 848    driDestroyOptionCache(&brw->optionCache);
 849
 850    /* free the Mesa context */
 851    _mesa_free_context_data(&brw->ctx);
 852
 853    ralloc_free(brw);
 854    driContextPriv->driverPrivate = NULL;
 855 }
 856
 857 GLboolean
 858 intelUnbindContext(__DRIcontext * driContextPriv)
 859 {
 860    /* Unset current context and dispath table */
 861    _mesa_make_current(NULL, NULL, NULL);
 862
 863    return true;
 864 }
 865
 866 /**
 867  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 868  * on window system framebuffers.
 869  *
 870  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 871  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 872  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 873  * for a visual where you're guaranteed to be capable, but it turns out that
 874  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 875  * incapable ones, becuase there's no difference between the two in resources
 876  * used.  Applications thus get built that accidentally rely on the default
 877  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 878  * great...
 879  *
 880  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 881  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 882  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 883  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 884  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 885  * and get no sRGB encode (assuming that both kinds of visual are available).
 886  * Thus our choice to support sRGB by default on our visuals for desktop would
 887  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 888  *
 889  * Unfortunately, renderbuffer setup happens before a context is created.  So
 890  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 891  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 892  * yet), we go turn that back off before anyone finds out.
 893  */
 894 static void
 895 intel_gles3_srgb_workaround(struct brw_context *brw,
 896                             struct gl_framebuffer *fb)
 897 {
 898    struct gl_context *ctx = &brw->ctx;
 899
 900    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 901       return;
 902
 903    /* Some day when we support the sRGB capable bit on visuals available for
 904     * GLES, we'll need to respect that and not disable things here.
 905     */
 906    fb->Visual.sRGBCapable = false;
 907    for (int i = 0; i < BUFFER_COUNT; i++) {
 908       if (fb->Attachment[i].Renderbuffer &&
 909           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 910          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 911       }
 912    }
 913 }
 914
 915 GLboolean
 916 intelMakeCurrent(__DRIcontext * driContextPriv,
 917                  __DRIdrawable * driDrawPriv,
 918                  __DRIdrawable * driReadPriv)
 919 {
 920    struct brw_context *brw;
 921    GET_CURRENT_CONTEXT(curCtx);
 922
 923    if (driContextPriv)
 924       brw = (struct brw_context *) driContextPriv->driverPrivate;
 925    else
 926       brw = NULL;
 927
 928    /* According to the glXMakeCurrent() man page: "Pending commands to
 929     * the previous context, if any, are flushed before it is released."
 930     * But only flush if we're actually changing contexts.
 931     */
 932    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 933       _mesa_flush(curCtx);
 934    }
 935
 936    if (driContextPriv) {
 937       struct gl_context *ctx = &brw->ctx;
 938       struct gl_framebuffer *fb, *readFb;
 939
 940       if (driDrawPriv == NULL && driReadPriv == NULL) {
 941          fb = _mesa_get_incomplete_framebuffer();
 942          readFb = _mesa_get_incomplete_framebuffer();
 943       } else {
 944          fb = driDrawPriv->driverPrivate;
 945          readFb = driReadPriv->driverPrivate;
 946          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 947          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 948       }
 949
 950       /* The sRGB workaround changes the renderbuffer's format. We must change
 951        * the format before the renderbuffer's miptree get's allocated, otherwise
 952        * the formats of the renderbuffer and its miptree will differ.
 953        */
 954       intel_gles3_srgb_workaround(brw, fb);
 955       intel_gles3_srgb_workaround(brw, readFb);
 956
 957       /* If the context viewport hasn't been initialized, force a call out to
 958        * the loader to get buffers so we have a drawable size for the initial
 959        * viewport. */
 960       if (!brw->ctx.ViewportInitialized)
 961          intel_prepare_render(brw);
 962
 963       _mesa_make_current(ctx, fb, readFb);
 964    } else {
 965       _mesa_make_current(NULL, NULL, NULL);
 966    }
 967
 968    return true;
 969 }
 970
 971 void
 972 intel_resolve_for_dri2_flush(struct brw_context *brw,
 973                              __DRIdrawable *drawable)
 974 {
 975    if (brw->gen < 6) {
 976       /* MSAA and fast color clear are not supported, so don't waste time
 977        * checking whether a resolve is needed.
 978        */
 979       return;
 980    }
 981
 982    struct gl_framebuffer *fb = drawable->driverPrivate;
 983    struct intel_renderbuffer *rb;
 984
 985    /* Usually, only the back buffer will need to be downsampled. However,
 986     * the front buffer will also need it if the user has rendered into it.
 987     */
 988    static const gl_buffer_index buffers[2] = {
 989          BUFFER_BACK_LEFT,
 990          BUFFER_FRONT_LEFT,
 991    };
 992
 993    for (int i = 0; i < 2; ++i) {
 994       rb = intel_get_renderbuffer(fb, buffers[i]);
 995       if (rb == NULL || rb->mt == NULL)
 996          continue;
 997       if (rb->mt->num_samples <= 1)
 998          intel_miptree_resolve_color(brw, rb->mt);
 999       else
1000          intel_renderbuffer_downsample(brw, rb);
1001    }
1002 }
1003
1004 static unsigned
1005 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1006 {
1007    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1008 }
1009
1010 static void
1011 intel_query_dri2_buffers(struct brw_context *brw,
1012                          __DRIdrawable *drawable,
1013                          __DRIbuffer **buffers,
1014                          int *count);
1015
1016 static void
1017 intel_process_dri2_buffer(struct brw_context *brw,
1018                           __DRIdrawable *drawable,
1019                           __DRIbuffer *buffer,
1020                           struct intel_renderbuffer *rb,
1021                           const char *buffer_name);
1022
1023 static void
1024 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1025
1026 static void
1027 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1028 {
1029    struct gl_framebuffer *fb = drawable->driverPrivate;
1030    struct intel_renderbuffer *rb;
1031    __DRIbuffer *buffers = NULL;
1032    int i, count;
1033    const char *region_name;
1034
1035    /* Set this up front, so that in case our buffers get invalidated
1036     * while we're getting new buffers, we don't clobber the stamp and
1037     * thus ignore the invalidate. */
1038    drawable->lastStamp = drawable->dri2.stamp;
1039
1040    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1041       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1042
1043    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1044
1045    if (buffers == NULL)
1046       return;
1047
1048    for (i = 0; i < count; i++) {
1049        switch (buffers[i].attachment) {
1050        case __DRI_BUFFER_FRONT_LEFT:
1051            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1052            region_name = "dri2 front buffer";
1053            break;
1054
1055        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1056            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1057            region_name = "dri2 fake front buffer";
1058            break;
1059
1060        case __DRI_BUFFER_BACK_LEFT:
1061            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1062            region_name = "dri2 back buffer";
1063            break;
1064
1065        case __DRI_BUFFER_DEPTH:
1066        case __DRI_BUFFER_HIZ:
1067        case __DRI_BUFFER_DEPTH_STENCIL:
1068        case __DRI_BUFFER_STENCIL:
1069        case __DRI_BUFFER_ACCUM:
1070        default:
1071            fprintf(stderr,
1072                    "unhandled buffer attach event, attachment type %d\n",
1073                    buffers[i].attachment);
1074            return;
1075        }
1076
1077        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1078    }
1079
1080 }
1081
1082 void
1083 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1084 {
1085    struct brw_context *brw = context->driverPrivate;
1086    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1087
1088    /* Set this up front, so that in case our buffers get invalidated
1089     * while we're getting new buffers, we don't clobber the stamp and
1090     * thus ignore the invalidate. */
1091    drawable->lastStamp = drawable->dri2.stamp;
1092
1093    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1094       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1095
1096    if (screen->image.loader)
1097       intel_update_image_buffers(brw, drawable);
1098    else
1099       intel_update_dri2_buffers(brw, drawable);
1100
1101    driUpdateFramebufferSize(&brw->ctx, drawable);
1102 }
1103
1104 /**
1105  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1106  * state is required.
1107  */
1108 void
1109 intel_prepare_render(struct brw_context *brw)
1110 {
1111    __DRIcontext *driContext = brw->driContext;
1112    __DRIdrawable *drawable;
1113
1114    drawable = driContext->driDrawablePriv;
1115    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1116       if (drawable->lastStamp != drawable->dri2.stamp)
1117          intel_update_renderbuffers(driContext, drawable);
1118       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1119    }
1120
1121    drawable = driContext->driReadablePriv;
1122    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1123       if (drawable->lastStamp != drawable->dri2.stamp)
1124          intel_update_renderbuffers(driContext, drawable);
1125       driContext->dri2.read_stamp = drawable->dri2.stamp;
1126    }
1127
1128    /* If we're currently rendering to the front buffer, the rendering
1129     * that will happen next will probably dirty the front buffer.  So
1130     * mark it as dirty here.
1131     */
1132    if (brw->is_front_buffer_rendering)
1133       brw->front_buffer_dirty = true;
1134
1135    /* Wait for the swapbuffers before the one we just emitted, so we
1136     * don't get too many swaps outstanding for apps that are GPU-heavy
1137     * but not CPU-heavy.
1138     *
1139     * We're using intelDRI2Flush (called from the loader before
1140     * swapbuffer) and glFlush (for front buffer rendering) as the
1141     * indicator that a frame is done and then throttle when we get
1142     * here as we prepare to render the next frame.  At this point for
1143     * round trips for swap/copy and getting new buffers are done and
1144     * we'll spend less time waiting on the GPU.
1145     *
1146     * Unfortunately, we don't have a handle to the batch containing
1147     * the swap, and getting our hands on that doesn't seem worth it,
1148     * so we just us the first batch we emitted after the last swap.
1149     */
1150    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1151       if (!brw->disable_throttling)
1152          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1153       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1154       brw->first_post_swapbuffers_batch = NULL;
1155       brw->need_throttle = false;
1156    }
1157 }
1158
1159 /**
1160  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1161  *
1162  * To determine which DRI buffers to request, examine the renderbuffers
1163  * attached to the drawable's framebuffer. Then request the buffers with
1164  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1165  *
1166  * This is called from intel_update_renderbuffers().
1167  *
1168  * \param drawable      Drawable whose buffers are queried.
1169  * \param buffers       [out] List of buffers returned by DRI2 query.
1170  * \param buffer_count  [out] Number of buffers returned.
1171  *
1172  * \see intel_update_renderbuffers()
1173  * \see DRI2GetBuffers()
1174  * \see DRI2GetBuffersWithFormat()
1175  */
1176 static void
1177 intel_query_dri2_buffers(struct brw_context *brw,
1178                          __DRIdrawable *drawable,
1179                          __DRIbuffer **buffers,
1180                          int *buffer_count)
1181 {
1182    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1183    struct gl_framebuffer *fb = drawable->driverPrivate;
1184    int i = 0;
1185    unsigned attachments[8];
1186
1187    struct intel_renderbuffer *front_rb;
1188    struct intel_renderbuffer *back_rb;
1189
1190    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1191    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1192
1193    memset(attachments, 0, sizeof(attachments));
1194    if ((brw->is_front_buffer_rendering ||
1195         brw->is_front_buffer_reading ||
1196         !back_rb) && front_rb) {
1197       /* If a fake front buffer is in use, then querying for
1198        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1199        * the real front buffer to the fake front buffer.  So before doing the
1200        * query, we need to make sure all the pending drawing has landed in the
1201        * real front buffer.
1202        */
1203       intel_batchbuffer_flush(brw);
1204       intel_flush_front(&brw->ctx);
1205
1206       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1207       attachments[i++] = intel_bits_per_pixel(front_rb);
1208    } else if (front_rb && brw->front_buffer_dirty) {
1209       /* We have pending front buffer rendering, but we aren't querying for a
1210        * front buffer.  If the front buffer we have is a fake front buffer,
1211        * the X server is going to throw it away when it processes the query.
1212        * So before doing the query, make sure all the pending drawing has
1213        * landed in the real front buffer.
1214        */
1215       intel_batchbuffer_flush(brw);
1216       intel_flush_front(&brw->ctx);
1217    }
1218
1219    if (back_rb) {
1220       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1221       attachments[i++] = intel_bits_per_pixel(back_rb);
1222    }
1223
1224    assert(i <= ARRAY_SIZE(attachments));
1225
1226    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1227                                                         &drawable->w,
1228                                                         &drawable->h,
1229                                                         attachments, i / 2,
1230                                                         buffer_count,
1231                                                         drawable->loaderPrivate);
1232 }
1233
1234 /**
1235  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1236  *
1237  * This is called from intel_update_renderbuffers().
1238  *
1239  * \par Note:
1240  *    DRI buffers whose attachment point is DRI2BufferStencil or
1241  *    DRI2BufferDepthStencil are handled as special cases.
1242  *
1243  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1244  *        that is passed to intel_region_alloc_for_handle().
1245  *
1246  * \see intel_update_renderbuffers()
1247  * \see intel_region_alloc_for_handle()
1248  */
1249 static void
1250 intel_process_dri2_buffer(struct brw_context *brw,
1251                           __DRIdrawable *drawable,
1252                           __DRIbuffer *buffer,
1253                           struct intel_renderbuffer *rb,
1254                           const char *buffer_name)
1255 {
1256    struct intel_region *region = NULL;
1257
1258    if (!rb)
1259       return;
1260
1261    unsigned num_samples = rb->Base.Base.NumSamples;
1262
1263    /* We try to avoid closing and reopening the same BO name, because the first
1264     * use of a mapping of the buffer involves a bunch of page faulting which is
1265     * moderately expensive.
1266     */
1267    if (num_samples == 0) {
1268        if (rb->mt &&
1269            rb->mt->region &&
1270            rb->mt->region->name == buffer->name)
1271           return;
1272    } else {
1273        if (rb->singlesample_mt &&
1274            rb->singlesample_mt->region &&
1275            rb->singlesample_mt->region->name == buffer->name)
1276           return;
1277    }
1278
1279    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1280       fprintf(stderr,
1281               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1282               buffer->name, buffer->attachment,
1283               buffer->cpp, buffer->pitch);
1284    }
1285
1286    intel_miptree_release(&rb->mt);
1287    region = intel_region_alloc_for_handle(brw->intelScreen,
1288                                           buffer->cpp,
1289                                           drawable->w,
1290                                           drawable->h,
1291                                           buffer->pitch,
1292                                           buffer->name,
1293                                           buffer_name);
1294    if (!region) {
1295       fprintf(stderr,
1296               "Failed to make region for returned DRI2 buffer "
1297               "(%dx%d, named %d).\n"
1298               "This is likely a bug in the X Server that will lead to a "
1299               "crash soon.\n",
1300               drawable->w, drawable->h, buffer->name);
1301       return;
1302    }
1303
1304    intel_update_winsys_renderbuffer_miptree(brw, rb, region);
1305
1306    if (brw->is_front_buffer_rendering &&
1307        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1308         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1309        rb->Base.Base.NumSamples > 1) {
1310       intel_renderbuffer_upsample(brw, rb);
1311    }
1312
1313    assert(rb->mt);
1314
1315    intel_region_release(&region);
1316 }
1317
1318 /**
1319  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1320  *
1321  * To determine which DRI buffers to request, examine the renderbuffers
1322  * attached to the drawable's framebuffer. Then request the buffers from
1323  * the image loader
1324  *
1325  * This is called from intel_update_renderbuffers().
1326  *
1327  * \param drawable      Drawable whose buffers are queried.
1328  * \param buffers       [out] List of buffers returned by DRI2 query.
1329  * \param buffer_count  [out] Number of buffers returned.
1330  *
1331  * \see intel_update_renderbuffers()
1332  */
1333
1334 static void
1335 intel_update_image_buffer(struct brw_context *intel,
1336                           __DRIdrawable *drawable,
1337                           struct intel_renderbuffer *rb,
1338                           __DRIimage *buffer,
1339                           enum __DRIimageBufferMask buffer_type)
1340 {
1341    struct intel_region *region = buffer->region;
1342
1343    if (!rb || !region)
1344       return;
1345
1346    unsigned num_samples = rb->Base.Base.NumSamples;
1347
1348    /* Check and see if we're already bound to the right
1349     * buffer object
1350     */
1351    if (num_samples == 0) {
1352        if (rb->mt &&
1353            rb->mt->region &&
1354            rb->mt->region->bo == region->bo)
1355           return;
1356    } else {
1357        if (rb->singlesample_mt &&
1358            rb->singlesample_mt->region &&
1359            rb->singlesample_mt->region->bo == region->bo)
1360           return;
1361    }
1362
1363    intel_update_winsys_renderbuffer_miptree(intel, rb, region);
1364
1365    if (intel->is_front_buffer_rendering &&
1366        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1367        rb->Base.Base.NumSamples > 1) {
1368       intel_renderbuffer_upsample(intel, rb);
1369    }
1370 }
1371
1372 static void
1373 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1374 {
1375    struct gl_framebuffer *fb = drawable->driverPrivate;
1376    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1377    struct intel_renderbuffer *front_rb;
1378    struct intel_renderbuffer *back_rb;
1379    struct __DRIimageList images;
1380    unsigned int format;
1381    uint32_t buffer_mask = 0;
1382
1383    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1384    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1385
1386    if (back_rb)
1387       format = intel_rb_format(back_rb);
1388    else if (front_rb)
1389       format = intel_rb_format(front_rb);
1390    else
1391       return;
1392
1393    if ((brw->is_front_buffer_rendering || brw->is_front_buffer_reading || !back_rb) && front_rb)
1394       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1395
1396    if (back_rb)
1397       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1398
1399    (*screen->image.loader->getBuffers) (drawable,
1400                                         driGLFormatToImageFormat(format),
1401                                         &drawable->dri2.stamp,
1402                                         drawable->loaderPrivate,
1403                                         buffer_mask,
1404                                         &images);
1405
1406    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1407       drawable->w = images.front->width;
1408       drawable->h = images.front->height;
1409       intel_update_image_buffer(brw,
1410                                 drawable,
1411                                 front_rb,
1412                                 images.front,
1413                                 __DRI_IMAGE_BUFFER_FRONT);
1414    }
1415    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1416       drawable->w = images.back->width;
1417       drawable->h = images.back->height;
1418       intel_update_image_buffer(brw,
1419                                 drawable,
1420                                 back_rb,
1421                                 images.back,
1422                                 __DRI_IMAGE_BUFFER_BACK);
1423    }
1424 }