src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44
  45 #include "vbo/vbo_context.h"
  46
  47 #include "drivers/common/driverfuncs.h"
  48 #include "drivers/common/meta.h"
  49 #include "utils.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_defines.h"
  53 #include "brw_draw.h"
  54 #include "brw_state.h"
  55
  56 #include "intel_batchbuffer.h"
  57 #include "intel_buffer_objects.h"
  58 #include "intel_buffers.h"
  59 #include "intel_fbo.h"
  60 #include "intel_mipmap_tree.h"
  61 #include "intel_pixel.h"
  62 #include "intel_image.h"
  63 #include "intel_tex.h"
  64 #include "intel_tex_obj.h"
  65
  66 #include "swrast_setup/swrast_setup.h"
  67 #include "tnl/tnl.h"
  68 #include "tnl/t_pipeline.h"
  69 #include "util/ralloc.h"
  70
  71 /***************************************
  72  * Mesa's Driver Functions
  73  ***************************************/
  74
  75 static size_t
  76 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  77                              GLenum internalFormat, int samples[16])
  78 {
  79    struct brw_context *brw = brw_context(ctx);
  80
  81    (void) target;
  82
  83    switch (brw->gen) {
  84    case 8:
  85       samples[0] = 8;
  86       samples[1] = 4;
  87       samples[2] = 2;
  88       return 3;
  89
  90    case 7:
  91       samples[0] = 8;
  92       samples[1] = 4;
  93       return 2;
  94
  95    case 6:
  96       samples[0] = 4;
  97       return 1;
  98
  99    default:
 100       samples[0] = 1;
 101       return 1;
 102    }
 103 }
 104
 105 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 106
 107 const char *
 108 brw_get_renderer_string(unsigned deviceID)
 109 {
 110    const char *chipset;
 111    static char buffer[128];
 112
 113    switch (deviceID) {
 114 #undef CHIPSET
 115 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 116 #include "pci_ids/i965_pci_ids.h"
 117    default:
 118       chipset = "Unknown Intel Chipset";
 119       break;
 120    }
 121
 122    (void) driGetRendererString(buffer, chipset, 0);
 123    return buffer;
 124 }
 125
 126 static const GLubyte *
 127 intel_get_string(struct gl_context * ctx, GLenum name)
 128 {
 129    const struct brw_context *const brw = brw_context(ctx);
 130
 131    switch (name) {
 132    case GL_VENDOR:
 133       return (GLubyte *) brw_vendor_string;
 134
 135    case GL_RENDERER:
 136       return
 137          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 138
 139    default:
 140       return NULL;
 141    }
 142 }
 143
 144 static void
 145 intel_viewport(struct gl_context *ctx)
 146 {
 147    struct brw_context *brw = brw_context(ctx);
 148    __DRIcontext *driContext = brw->driContext;
 149
 150    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 151       dri2InvalidateDrawable(driContext->driDrawablePriv);
 152       dri2InvalidateDrawable(driContext->driReadablePriv);
 153    }
 154 }
 155
 156 static void
 157 intel_update_state(struct gl_context * ctx, GLuint new_state)
 158 {
 159    struct brw_context *brw = brw_context(ctx);
 160    struct intel_texture_object *tex_obj;
 161    struct intel_renderbuffer *depth_irb;
 162
 163    if (ctx->swrast_context)
 164       _swrast_InvalidateState(ctx, new_state);
 165    _vbo_InvalidateState(ctx, new_state);
 166
 167    brw->NewGLState |= new_state;
 168
 169    _mesa_unlock_context_textures(ctx);
 170
 171    /* Resolve the depth buffer's HiZ buffer. */
 172    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 173    if (depth_irb)
 174       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 175
 176    /* Resolve depth buffer and render cache of each enabled texture. */
 177    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 178    for (int i = 0; i <= maxEnabledUnit; i++) {
 179       if (!ctx->Texture.Unit[i]._Current)
 180          continue;
 181       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 182       if (!tex_obj || !tex_obj->mt)
 183          continue;
 184       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 185       intel_miptree_resolve_color(brw, tex_obj->mt);
 186       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 187    }
 188
 189    _mesa_lock_context_textures(ctx);
 190 }
 191
 192 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 193
 194 static void
 195 intel_flush_front(struct gl_context *ctx)
 196 {
 197    struct brw_context *brw = brw_context(ctx);
 198    __DRIcontext *driContext = brw->driContext;
 199    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 200    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 201
 202    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 203       if (flushFront(screen) && driDrawable &&
 204           driDrawable->loaderPrivate) {
 205
 206          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 207           *
 208           * This potentially resolves both front and back buffer. It
 209           * is unnecessary to resolve the back, but harms nothing except
 210           * performance. And no one cares about front-buffer render
 211           * performance.
 212           */
 213          intel_resolve_for_dri2_flush(brw, driDrawable);
 214          intel_batchbuffer_flush(brw);
 215
 216          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 217
 218          /* We set the dirty bit in intel_prepare_render() if we're
 219           * front buffer rendering once we get there.
 220           */
 221          brw->front_buffer_dirty = false;
 222       }
 223    }
 224 }
 225
 226 static void
 227 intel_glFlush(struct gl_context *ctx)
 228 {
 229    struct brw_context *brw = brw_context(ctx);
 230
 231    intel_batchbuffer_flush(brw);
 232    intel_flush_front(ctx);
 233    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
 234       brw->need_throttle = true;
 235 }
 236
 237 static void
 238 intel_finish(struct gl_context * ctx)
 239 {
 240    struct brw_context *brw = brw_context(ctx);
 241
 242    intel_glFlush(ctx);
 243
 244    if (brw->batch.last_bo)
 245       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 246 }
 247
 248 static void
 249 brw_init_driver_functions(struct brw_context *brw,
 250                           struct dd_function_table *functions)
 251 {
 252    _mesa_init_driver_functions(functions);
 253
 254    /* GLX uses DRI2 invalidate events to handle window resizing.
 255     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 256     * which doesn't provide a mechanism for snooping the event queues.
 257     *
 258     * So EGL still relies on viewport hacks to handle window resizing.
 259     * This should go away with DRI3000.
 260     */
 261    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 262       functions->Viewport = intel_viewport;
 263
 264    functions->Flush = intel_glFlush;
 265    functions->Finish = intel_finish;
 266    functions->GetString = intel_get_string;
 267    functions->UpdateState = intel_update_state;
 268
 269    intelInitTextureFuncs(functions);
 270    intelInitTextureImageFuncs(functions);
 271    intelInitTextureSubImageFuncs(functions);
 272    intelInitTextureCopyImageFuncs(functions);
 273    intelInitCopyImageFuncs(functions);
 274    intelInitClearFuncs(functions);
 275    intelInitBufferFuncs(functions);
 276    intelInitPixelFuncs(functions);
 277    intelInitBufferObjectFuncs(functions);
 278    intel_init_syncobj_functions(functions);
 279    brw_init_object_purgeable_functions(functions);
 280
 281    brwInitFragProgFuncs( functions );
 282    brw_init_common_queryobj_functions(functions);
 283    if (brw->gen >= 6)
 284       gen6_init_queryobj_functions(functions);
 285    else
 286       gen4_init_queryobj_functions(functions);
 287
 288    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 289
 290    functions->NewTransformFeedback = brw_new_transform_feedback;
 291    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 292    functions->GetTransformFeedbackVertexCount =
 293       brw_get_transform_feedback_vertex_count;
 294    if (brw->gen >= 7) {
 295       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 296       functions->EndTransformFeedback = gen7_end_transform_feedback;
 297       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 298       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 299    } else {
 300       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 301       functions->EndTransformFeedback = brw_end_transform_feedback;
 302    }
 303
 304    if (brw->gen >= 6)
 305       functions->GetSamplePosition = gen6_get_sample_position;
 306 }
 307
 308 static void
 309 brw_initialize_context_constants(struct brw_context *brw)
 310 {
 311    struct gl_context *ctx = &brw->ctx;
 312
 313    unsigned max_samplers =
 314       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 315
 316    ctx->Const.QueryCounterBits.Timestamp = 36;
 317
 318    ctx->Const.StripTextureBorder = true;
 319
 320    ctx->Const.MaxDualSourceDrawBuffers = 1;
 321    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 322    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 323    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 324    ctx->Const.MaxTextureUnits =
 325       MIN2(ctx->Const.MaxTextureCoordUnits,
 326            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 327    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 328    if (brw->gen >= 7)
 329       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 330    else
 331       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 332    if (_mesa_extension_override_enables.ARB_compute_shader) {
 333       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 334       ctx->Const.MaxUniformBufferBindings += 12;
 335    } else {
 336       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 337    }
 338    ctx->Const.MaxCombinedTextureImageUnits =
 339       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 340       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 341       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 342       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 343
 344    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 345    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 346       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 347    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 348    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 349    ctx->Const.MaxTextureMbytes = 1536;
 350
 351    if (brw->gen >= 7)
 352       ctx->Const.MaxArrayTextureLayers = 2048;
 353    else
 354       ctx->Const.MaxArrayTextureLayers = 512;
 355
 356    ctx->Const.MaxTextureRectSize = 1 << 12;
 357
 358    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 359
 360    ctx->Const.MaxRenderbufferSize = 8192;
 361
 362    /* Hardware only supports a limited number of transform feedback buffers.
 363     * So we need to override the Mesa default (which is based only on software
 364     * limits).
 365     */
 366    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 367
 368    /* On Gen6, in the worst case, we use up one binding table entry per
 369     * transform feedback component (see comments above the definition of
 370     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 371     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 372     * BRW_MAX_SOL_BINDINGS.
 373     *
 374     * In "separate components" mode, we need to divide this value by
 375     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 376     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 377     */
 378    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 379    ctx->Const.MaxTransformFeedbackSeparateComponents =
 380       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 381
 382    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 383
 384    int max_samples;
 385    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 386    const int clamp_max_samples =
 387       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 388
 389    if (clamp_max_samples < 0) {
 390       max_samples = msaa_modes[0];
 391    } else {
 392       /* Select the largest supported MSAA mode that does not exceed
 393        * clamp_max_samples.
 394        */
 395       max_samples = 0;
 396       for (int i = 0; msaa_modes[i] != 0; ++i) {
 397          if (msaa_modes[i] <= clamp_max_samples) {
 398             max_samples = msaa_modes[i];
 399             break;
 400          }
 401       }
 402    }
 403
 404    ctx->Const.MaxSamples = max_samples;
 405    ctx->Const.MaxColorTextureSamples = max_samples;
 406    ctx->Const.MaxDepthTextureSamples = max_samples;
 407    ctx->Const.MaxIntegerSamples = max_samples;
 408
 409    if (brw->gen >= 7)
 410       ctx->Const.MaxProgramTextureGatherComponents = 4;
 411    else if (brw->gen == 6)
 412       ctx->Const.MaxProgramTextureGatherComponents = 1;
 413
 414    ctx->Const.MinLineWidth = 1.0;
 415    ctx->Const.MinLineWidthAA = 1.0;
 416    ctx->Const.MaxLineWidth = 5.0;
 417    ctx->Const.MaxLineWidthAA = 5.0;
 418    ctx->Const.LineWidthGranularity = 0.5;
 419
 420    ctx->Const.MinPointSize = 1.0;
 421    ctx->Const.MinPointSizeAA = 1.0;
 422    ctx->Const.MaxPointSize = 255.0;
 423    ctx->Const.MaxPointSizeAA = 255.0;
 424    ctx->Const.PointSizeGranularity = 1.0;
 425
 426    if (brw->gen >= 5 || brw->is_g4x)
 427       ctx->Const.MaxClipPlanes = 8;
 428
 429    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 430    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 431    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 432    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 433    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 434    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 435    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 436    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 437    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 438    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 439    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 440    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 441       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 442            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 443
 444    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 445    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 446    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 447    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 448    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 449    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 450    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 451    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 452    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 453       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 454            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 455
 456    /* Fragment shaders use real, 32-bit twos-complement integers for all
 457     * integer types.
 458     */
 459    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 460    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 461    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 462    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 463    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 464
 465    if (brw->gen >= 7) {
 466       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 467       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 468       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 469       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 470       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 471       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 472       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 473       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 474       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 475    }
 476
 477    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 478     * but we're not sure how it's actually done for vertex order,
 479     * that affect provoking vertex decision. Always use last vertex
 480     * convention for quad primitive which works as expected for now.
 481     */
 482    if (brw->gen >= 6)
 483       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 484
 485    ctx->Const.NativeIntegers = true;
 486    ctx->Const.UniformBooleanTrue = 1;
 487
 488    /* From the gen4 PRM, volume 4 page 127:
 489     *
 490     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 491     *      the base address of the first element of the surface, computed in
 492     *      software by adding the surface base address to the byte offset of
 493     *      the element in the buffer."
 494     *
 495     * However, unaligned accesses are slower, so enforce buffer alignment.
 496     */
 497    ctx->Const.UniformBufferOffsetAlignment = 16;
 498    ctx->Const.TextureBufferOffsetAlignment = 16;
 499
 500    if (brw->gen >= 6) {
 501       ctx->Const.MaxVarying = 32;
 502       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 503       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 504       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 505       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 506    }
 507
 508    /* We want the GLSL compiler to emit code that uses condition codes */
 509    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 510       ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 511       ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
 512       ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
 513       ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
 514       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 515       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
 516          (i == MESA_SHADER_FRAGMENT);
 517       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
 518          (i == MESA_SHADER_FRAGMENT);
 519       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 520       ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
 521    }
 522
 523    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 524    ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 525
 526    /* ARB_viewport_array */
 527    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 528       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 529       ctx->Const.ViewportSubpixelBits = 0;
 530
 531       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 532        */
 533       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 534       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 535    }
 536
 537    /* ARB_gpu_shader5 */
 538    if (brw->gen >= 7)
 539       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 540 }
 541
 542 /**
 543  * Process driconf (drirc) options, setting appropriate context flags.
 544  *
 545  * intelInitExtensions still pokes at optionCache directly, in order to
 546  * avoid advertising various extensions.  No flags are set, so it makes
 547  * sense to continue doing that there.
 548  */
 549 static void
 550 brw_process_driconf_options(struct brw_context *brw)
 551 {
 552    struct gl_context *ctx = &brw->ctx;
 553
 554    driOptionCache *options = &brw->optionCache;
 555    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 556                        brw->driContext->driScreenPriv->myNum, "i965");
 557
 558    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 559    switch (bo_reuse_mode) {
 560    case DRI_CONF_BO_REUSE_DISABLED:
 561       break;
 562    case DRI_CONF_BO_REUSE_ALL:
 563       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 564       break;
 565    }
 566
 567    if (!driQueryOptionb(options, "hiz")) {
 568        brw->has_hiz = false;
 569        /* On gen6, you can only do separate stencil with HIZ. */
 570        if (brw->gen == 6)
 571           brw->has_separate_stencil = false;
 572    }
 573
 574    if (driQueryOptionb(options, "always_flush_batch")) {
 575       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 576       brw->always_flush_batch = true;
 577    }
 578
 579    if (driQueryOptionb(options, "always_flush_cache")) {
 580       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 581       brw->always_flush_cache = true;
 582    }
 583
 584    if (driQueryOptionb(options, "disable_throttling")) {
 585       fprintf(stderr, "disabling flush throttling\n");
 586       brw->disable_throttling = true;
 587    }
 588
 589    brw->disable_derivative_optimization =
 590       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 591
 592    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 593
 594    ctx->Const.ForceGLSLExtensionsWarn =
 595       driQueryOptionb(options, "force_glsl_extensions_warn");
 596
 597    ctx->Const.DisableGLSLLineContinuations =
 598       driQueryOptionb(options, "disable_glsl_line_continuations");
 599
 600    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 601       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 602 }
 603
 604 GLboolean
 605 brwCreateContext(gl_api api,
 606                  const struct gl_config *mesaVis,
 607                  __DRIcontext *driContextPriv,
 608                  unsigned major_version,
 609                  unsigned minor_version,
 610                  uint32_t flags,
 611                  bool notify_reset,
 612                  unsigned *dri_ctx_error,
 613                  void *sharedContextPrivate)
 614 {
 615    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 616    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 617    struct intel_screen *screen = sPriv->driverPrivate;
 618    const struct brw_device_info *devinfo = screen->devinfo;
 619    struct dd_function_table functions;
 620
 621    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 622     * provides us with context reset notifications.
 623     */
 624    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 625       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 626
 627    if (screen->has_context_reset_notification)
 628       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 629
 630    if (flags & ~allowed_flags) {
 631       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 632       return false;
 633    }
 634
 635    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 636    if (!brw) {
 637       fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
 638       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 639       return false;
 640    }
 641
 642    driContextPriv->driverPrivate = brw;
 643    brw->driContext = driContextPriv;
 644    brw->intelScreen = screen;
 645    brw->bufmgr = screen->bufmgr;
 646
 647    brw->gen = devinfo->gen;
 648    brw->gt = devinfo->gt;
 649    brw->is_g4x = devinfo->is_g4x;
 650    brw->is_baytrail = devinfo->is_baytrail;
 651    brw->is_haswell = devinfo->is_haswell;
 652    brw->is_cherryview = devinfo->is_cherryview;
 653    brw->has_llc = devinfo->has_llc;
 654    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 655    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 656    brw->has_pln = devinfo->has_pln;
 657    brw->has_compr4 = devinfo->has_compr4;
 658    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 659    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 660    brw->needs_unlit_centroid_workaround =
 661       devinfo->needs_unlit_centroid_workaround;
 662
 663    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 664    brw->has_swizzling = screen->hw_has_swizzling;
 665
 666    brw->vs.base.stage = MESA_SHADER_VERTEX;
 667    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 668    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 669    if (brw->gen >= 8) {
 670       gen8_init_vtable_surface_functions(brw);
 671       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 672    } else if (brw->gen >= 7) {
 673       gen7_init_vtable_surface_functions(brw);
 674       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 675    } else {
 676       gen4_init_vtable_surface_functions(brw);
 677       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 678    }
 679
 680    brw_init_driver_functions(brw, &functions);
 681
 682    if (notify_reset)
 683       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 684
 685    struct gl_context *ctx = &brw->ctx;
 686
 687    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 688       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 689       fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
 690       intelDestroyContext(driContextPriv);
 691       return false;
 692    }
 693
 694    driContextSetFlags(ctx, flags);
 695
 696    /* Initialize the software rasterizer and helper modules.
 697     *
 698     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 699     * software fallbacks (which we have to support on legacy GL to do weird
 700     * glDrawPixels(), glBitmap(), and other functions).
 701     */
 702    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 703       _swrast_CreateContext(ctx);
 704    }
 705
 706    _vbo_CreateContext(ctx);
 707    if (ctx->swrast_context) {
 708       _tnl_CreateContext(ctx);
 709       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 710       _swsetup_CreateContext(ctx);
 711
 712       /* Configure swrast to match hardware characteristics: */
 713       _swrast_allow_pixel_fog(ctx, false);
 714       _swrast_allow_vertex_fog(ctx, true);
 715    }
 716
 717    _mesa_meta_init(ctx);
 718
 719    brw_process_driconf_options(brw);
 720    brw_process_intel_debug_variable(brw);
 721    brw_initialize_context_constants(brw);
 722
 723    ctx->Const.ResetStrategy = notify_reset
 724       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 725
 726    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 727    _mesa_init_point(ctx);
 728
 729    intel_fbo_init(brw);
 730
 731    intel_batchbuffer_init(brw);
 732
 733    if (brw->gen >= 6) {
 734       /* Create a new hardware context.  Using a hardware context means that
 735        * our GPU state will be saved/restored on context switch, allowing us
 736        * to assume that the GPU is in the same state we left it in.
 737        *
 738        * This is required for transform feedback buffer offsets, query objects,
 739        * and also allows us to reduce how much state we have to emit.
 740        */
 741       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 742
 743       if (!brw->hw_ctx) {
 744          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 745          intelDestroyContext(driContextPriv);
 746          return false;
 747       }
 748    }
 749
 750    brw_init_state(brw);
 751
 752    intelInitExtensions(ctx);
 753
 754    brw_init_surface_formats(brw);
 755
 756    brw->max_vs_threads = devinfo->max_vs_threads;
 757    brw->max_gs_threads = devinfo->max_gs_threads;
 758    brw->max_wm_threads = devinfo->max_wm_threads;
 759    brw->urb.size = devinfo->urb.size;
 760    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 761    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 762    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 763
 764    /* Estimate the size of the mappable aperture into the GTT.  There's an
 765     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 766     * It turns out it's basically always 256MB, though some ancient hardware
 767     * was smaller.
 768     */
 769    uint32_t gtt_size = 256 * 1024 * 1024;
 770
 771    /* We don't want to map two objects such that a memcpy between them would
 772     * just fault one mapping in and then the other over and over forever.  So
 773     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 774     * taken up by things like the framebuffer and the ringbuffer and such, so
 775     * be more conservative.
 776     */
 777    brw->max_gtt_map_object_size = gtt_size / 4;
 778
 779    if (brw->gen == 6)
 780       brw->urb.gen6_gs_previously_active = false;
 781
 782    brw->prim_restart.in_progress = false;
 783    brw->prim_restart.enable_cut_index = false;
 784    brw->gs.enabled = false;
 785    brw->sf.viewport_transform_enable = true;
 786
 787    ctx->VertexProgram._MaintainTnlProgram = true;
 788    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 789
 790    brw_draw_init( brw );
 791
 792    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 793       /* Turn on some extra GL_ARB_debug_output generation. */
 794       brw->perf_debug = true;
 795    }
 796
 797    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 798       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 799
 800    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 801       brw_init_shader_time(brw);
 802
 803    _mesa_compute_version(ctx);
 804
 805    _mesa_initialize_dispatch_tables(ctx);
 806    _mesa_initialize_vbo_vtxfmt(ctx);
 807
 808    if (ctx->Extensions.AMD_performance_monitor) {
 809       brw_init_performance_monitors(brw);
 810    }
 811
 812    vbo_use_buffer_objects(ctx);
 813    vbo_always_unmap_buffers(ctx);
 814
 815    return true;
 816 }
 817
 818 void
 819 intelDestroyContext(__DRIcontext * driContextPriv)
 820 {
 821    struct brw_context *brw =
 822       (struct brw_context *) driContextPriv->driverPrivate;
 823    struct gl_context *ctx = &brw->ctx;
 824
 825    assert(brw); /* should never be null */
 826    if (!brw)
 827       return;
 828
 829    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 830    if (INTEL_DEBUG & DEBUG_AUB) {
 831       intel_batchbuffer_flush(brw);
 832       aub_dump_bmp(&brw->ctx);
 833    }
 834
 835    _mesa_meta_free(&brw->ctx);
 836
 837    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 838       /* Force a report. */
 839       brw->shader_time.report_time = 0;
 840
 841       brw_collect_and_report_shader_time(brw);
 842       brw_destroy_shader_time(brw);
 843    }
 844
 845    brw_destroy_state(brw);
 846    brw_draw_destroy(brw);
 847
 848    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 849
 850    drm_intel_gem_context_destroy(brw->hw_ctx);
 851
 852    if (ctx->swrast_context) {
 853       _swsetup_DestroyContext(&brw->ctx);
 854       _tnl_DestroyContext(&brw->ctx);
 855    }
 856    _vbo_DestroyContext(&brw->ctx);
 857
 858    if (ctx->swrast_context)
 859       _swrast_DestroyContext(&brw->ctx);
 860
 861    intel_batchbuffer_free(brw);
 862
 863    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 864    brw->first_post_swapbuffers_batch = NULL;
 865
 866    driDestroyOptionCache(&brw->optionCache);
 867
 868    /* free the Mesa context */
 869    _mesa_free_context_data(&brw->ctx);
 870
 871    ralloc_free(brw);
 872    driContextPriv->driverPrivate = NULL;
 873 }
 874
 875 GLboolean
 876 intelUnbindContext(__DRIcontext * driContextPriv)
 877 {
 878    /* Unset current context and dispath table */
 879    _mesa_make_current(NULL, NULL, NULL);
 880
 881    return true;
 882 }
 883
 884 /**
 885  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 886  * on window system framebuffers.
 887  *
 888  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 889  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 890  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 891  * for a visual where you're guaranteed to be capable, but it turns out that
 892  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 893  * incapable ones, becuase there's no difference between the two in resources
 894  * used.  Applications thus get built that accidentally rely on the default
 895  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 896  * great...
 897  *
 898  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 899  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 900  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 901  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 902  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 903  * and get no sRGB encode (assuming that both kinds of visual are available).
 904  * Thus our choice to support sRGB by default on our visuals for desktop would
 905  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 906  *
 907  * Unfortunately, renderbuffer setup happens before a context is created.  So
 908  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 909  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 910  * yet), we go turn that back off before anyone finds out.
 911  */
 912 static void
 913 intel_gles3_srgb_workaround(struct brw_context *brw,
 914                             struct gl_framebuffer *fb)
 915 {
 916    struct gl_context *ctx = &brw->ctx;
 917
 918    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 919       return;
 920
 921    /* Some day when we support the sRGB capable bit on visuals available for
 922     * GLES, we'll need to respect that and not disable things here.
 923     */
 924    fb->Visual.sRGBCapable = false;
 925    for (int i = 0; i < BUFFER_COUNT; i++) {
 926       if (fb->Attachment[i].Renderbuffer &&
 927           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 928          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 929       }
 930    }
 931 }
 932
 933 GLboolean
 934 intelMakeCurrent(__DRIcontext * driContextPriv,
 935                  __DRIdrawable * driDrawPriv,
 936                  __DRIdrawable * driReadPriv)
 937 {
 938    struct brw_context *brw;
 939    GET_CURRENT_CONTEXT(curCtx);
 940
 941    if (driContextPriv)
 942       brw = (struct brw_context *) driContextPriv->driverPrivate;
 943    else
 944       brw = NULL;
 945
 946    /* According to the glXMakeCurrent() man page: "Pending commands to
 947     * the previous context, if any, are flushed before it is released."
 948     * But only flush if we're actually changing contexts.
 949     */
 950    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 951       _mesa_flush(curCtx);
 952    }
 953
 954    if (driContextPriv) {
 955       struct gl_context *ctx = &brw->ctx;
 956       struct gl_framebuffer *fb, *readFb;
 957
 958       if (driDrawPriv == NULL && driReadPriv == NULL) {
 959          fb = _mesa_get_incomplete_framebuffer();
 960          readFb = _mesa_get_incomplete_framebuffer();
 961       } else {
 962          fb = driDrawPriv->driverPrivate;
 963          readFb = driReadPriv->driverPrivate;
 964          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 965          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 966       }
 967
 968       /* The sRGB workaround changes the renderbuffer's format. We must change
 969        * the format before the renderbuffer's miptree get's allocated, otherwise
 970        * the formats of the renderbuffer and its miptree will differ.
 971        */
 972       intel_gles3_srgb_workaround(brw, fb);
 973       intel_gles3_srgb_workaround(brw, readFb);
 974
 975       /* If the context viewport hasn't been initialized, force a call out to
 976        * the loader to get buffers so we have a drawable size for the initial
 977        * viewport. */
 978       if (!brw->ctx.ViewportInitialized)
 979          intel_prepare_render(brw);
 980
 981       _mesa_make_current(ctx, fb, readFb);
 982    } else {
 983       _mesa_make_current(NULL, NULL, NULL);
 984    }
 985
 986    return true;
 987 }
 988
 989 void
 990 intel_resolve_for_dri2_flush(struct brw_context *brw,
 991                              __DRIdrawable *drawable)
 992 {
 993    if (brw->gen < 6) {
 994       /* MSAA and fast color clear are not supported, so don't waste time
 995        * checking whether a resolve is needed.
 996        */
 997       return;
 998    }
 999
1000    struct gl_framebuffer *fb = drawable->driverPrivate;
1001    struct intel_renderbuffer *rb;
1002
1003    /* Usually, only the back buffer will need to be downsampled. However,
1004     * the front buffer will also need it if the user has rendered into it.
1005     */
1006    static const gl_buffer_index buffers[2] = {
1007          BUFFER_BACK_LEFT,
1008          BUFFER_FRONT_LEFT,
1009    };
1010
1011    for (int i = 0; i < 2; ++i) {
1012       rb = intel_get_renderbuffer(fb, buffers[i]);
1013       if (rb == NULL || rb->mt == NULL)
1014          continue;
1015       if (rb->mt->num_samples <= 1)
1016          intel_miptree_resolve_color(brw, rb->mt);
1017       else
1018          intel_renderbuffer_downsample(brw, rb);
1019    }
1020 }
1021
1022 static unsigned
1023 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1024 {
1025    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1026 }
1027
1028 static void
1029 intel_query_dri2_buffers(struct brw_context *brw,
1030                          __DRIdrawable *drawable,
1031                          __DRIbuffer **buffers,
1032                          int *count);
1033
1034 static void
1035 intel_process_dri2_buffer(struct brw_context *brw,
1036                           __DRIdrawable *drawable,
1037                           __DRIbuffer *buffer,
1038                           struct intel_renderbuffer *rb,
1039                           const char *buffer_name);
1040
1041 static void
1042 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1043
1044 static void
1045 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1046 {
1047    struct gl_framebuffer *fb = drawable->driverPrivate;
1048    struct intel_renderbuffer *rb;
1049    __DRIbuffer *buffers = NULL;
1050    int i, count;
1051    const char *region_name;
1052
1053    /* Set this up front, so that in case our buffers get invalidated
1054     * while we're getting new buffers, we don't clobber the stamp and
1055     * thus ignore the invalidate. */
1056    drawable->lastStamp = drawable->dri2.stamp;
1057
1058    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1059       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1060
1061    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1062
1063    if (buffers == NULL)
1064       return;
1065
1066    for (i = 0; i < count; i++) {
1067        switch (buffers[i].attachment) {
1068        case __DRI_BUFFER_FRONT_LEFT:
1069            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1070            region_name = "dri2 front buffer";
1071            break;
1072
1073        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1074            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1075            region_name = "dri2 fake front buffer";
1076            break;
1077
1078        case __DRI_BUFFER_BACK_LEFT:
1079            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1080            region_name = "dri2 back buffer";
1081            break;
1082
1083        case __DRI_BUFFER_DEPTH:
1084        case __DRI_BUFFER_HIZ:
1085        case __DRI_BUFFER_DEPTH_STENCIL:
1086        case __DRI_BUFFER_STENCIL:
1087        case __DRI_BUFFER_ACCUM:
1088        default:
1089            fprintf(stderr,
1090                    "unhandled buffer attach event, attachment type %d\n",
1091                    buffers[i].attachment);
1092            return;
1093        }
1094
1095        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1096    }
1097
1098 }
1099
1100 void
1101 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1102 {
1103    struct brw_context *brw = context->driverPrivate;
1104    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1105
1106    /* Set this up front, so that in case our buffers get invalidated
1107     * while we're getting new buffers, we don't clobber the stamp and
1108     * thus ignore the invalidate. */
1109    drawable->lastStamp = drawable->dri2.stamp;
1110
1111    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1112       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1113
1114    if (screen->image.loader)
1115       intel_update_image_buffers(brw, drawable);
1116    else
1117       intel_update_dri2_buffers(brw, drawable);
1118
1119    driUpdateFramebufferSize(&brw->ctx, drawable);
1120 }
1121
1122 /**
1123  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1124  * state is required.
1125  */
1126 void
1127 intel_prepare_render(struct brw_context *brw)
1128 {
1129    struct gl_context *ctx = &brw->ctx;
1130    __DRIcontext *driContext = brw->driContext;
1131    __DRIdrawable *drawable;
1132
1133    drawable = driContext->driDrawablePriv;
1134    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1135       if (drawable->lastStamp != drawable->dri2.stamp)
1136          intel_update_renderbuffers(driContext, drawable);
1137       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1138    }
1139
1140    drawable = driContext->driReadablePriv;
1141    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1142       if (drawable->lastStamp != drawable->dri2.stamp)
1143          intel_update_renderbuffers(driContext, drawable);
1144       driContext->dri2.read_stamp = drawable->dri2.stamp;
1145    }
1146
1147    /* If we're currently rendering to the front buffer, the rendering
1148     * that will happen next will probably dirty the front buffer.  So
1149     * mark it as dirty here.
1150     */
1151    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1152       brw->front_buffer_dirty = true;
1153
1154    /* Wait for the swapbuffers before the one we just emitted, so we
1155     * don't get too many swaps outstanding for apps that are GPU-heavy
1156     * but not CPU-heavy.
1157     *
1158     * We're using intelDRI2Flush (called from the loader before
1159     * swapbuffer) and glFlush (for front buffer rendering) as the
1160     * indicator that a frame is done and then throttle when we get
1161     * here as we prepare to render the next frame.  At this point for
1162     * round trips for swap/copy and getting new buffers are done and
1163     * we'll spend less time waiting on the GPU.
1164     *
1165     * Unfortunately, we don't have a handle to the batch containing
1166     * the swap, and getting our hands on that doesn't seem worth it,
1167     * so we just us the first batch we emitted after the last swap.
1168     */
1169    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1170       if (!brw->disable_throttling)
1171          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1172       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1173       brw->first_post_swapbuffers_batch = NULL;
1174       brw->need_throttle = false;
1175    }
1176 }
1177
1178 /**
1179  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1180  *
1181  * To determine which DRI buffers to request, examine the renderbuffers
1182  * attached to the drawable's framebuffer. Then request the buffers with
1183  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1184  *
1185  * This is called from intel_update_renderbuffers().
1186  *
1187  * \param drawable      Drawable whose buffers are queried.
1188  * \param buffers       [out] List of buffers returned by DRI2 query.
1189  * \param buffer_count  [out] Number of buffers returned.
1190  *
1191  * \see intel_update_renderbuffers()
1192  * \see DRI2GetBuffers()
1193  * \see DRI2GetBuffersWithFormat()
1194  */
1195 static void
1196 intel_query_dri2_buffers(struct brw_context *brw,
1197                          __DRIdrawable *drawable,
1198                          __DRIbuffer **buffers,
1199                          int *buffer_count)
1200 {
1201    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1202    struct gl_framebuffer *fb = drawable->driverPrivate;
1203    int i = 0;
1204    unsigned attachments[8];
1205
1206    struct intel_renderbuffer *front_rb;
1207    struct intel_renderbuffer *back_rb;
1208
1209    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1210    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1211
1212    memset(attachments, 0, sizeof(attachments));
1213    if ((brw_is_front_buffer_drawing(fb) ||
1214         brw_is_front_buffer_reading(fb) ||
1215         !back_rb) && front_rb) {
1216       /* If a fake front buffer is in use, then querying for
1217        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1218        * the real front buffer to the fake front buffer.  So before doing the
1219        * query, we need to make sure all the pending drawing has landed in the
1220        * real front buffer.
1221        */
1222       intel_batchbuffer_flush(brw);
1223       intel_flush_front(&brw->ctx);
1224
1225       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1226       attachments[i++] = intel_bits_per_pixel(front_rb);
1227    } else if (front_rb && brw->front_buffer_dirty) {
1228       /* We have pending front buffer rendering, but we aren't querying for a
1229        * front buffer.  If the front buffer we have is a fake front buffer,
1230        * the X server is going to throw it away when it processes the query.
1231        * So before doing the query, make sure all the pending drawing has
1232        * landed in the real front buffer.
1233        */
1234       intel_batchbuffer_flush(brw);
1235       intel_flush_front(&brw->ctx);
1236    }
1237
1238    if (back_rb) {
1239       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1240       attachments[i++] = intel_bits_per_pixel(back_rb);
1241    }
1242
1243    assert(i <= ARRAY_SIZE(attachments));
1244
1245    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1246                                                         &drawable->w,
1247                                                         &drawable->h,
1248                                                         attachments, i / 2,
1249                                                         buffer_count,
1250                                                         drawable->loaderPrivate);
1251 }
1252
1253 /**
1254  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1255  *
1256  * This is called from intel_update_renderbuffers().
1257  *
1258  * \par Note:
1259  *    DRI buffers whose attachment point is DRI2BufferStencil or
1260  *    DRI2BufferDepthStencil are handled as special cases.
1261  *
1262  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1263  *        that is passed to drm_intel_bo_gem_create_from_name().
1264  *
1265  * \see intel_update_renderbuffers()
1266  */
1267 static void
1268 intel_process_dri2_buffer(struct brw_context *brw,
1269                           __DRIdrawable *drawable,
1270                           __DRIbuffer *buffer,
1271                           struct intel_renderbuffer *rb,
1272                           const char *buffer_name)
1273 {
1274    struct gl_framebuffer *fb = drawable->driverPrivate;
1275    drm_intel_bo *bo;
1276
1277    if (!rb)
1278       return;
1279
1280    unsigned num_samples = rb->Base.Base.NumSamples;
1281
1282    /* We try to avoid closing and reopening the same BO name, because the first
1283     * use of a mapping of the buffer involves a bunch of page faulting which is
1284     * moderately expensive.
1285     */
1286    struct intel_mipmap_tree *last_mt;
1287    if (num_samples == 0)
1288       last_mt = rb->mt;
1289    else
1290       last_mt = rb->singlesample_mt;
1291
1292    uint32_t old_name = 0;
1293    if (last_mt) {
1294        /* The bo already has a name because the miptree was created by a
1295         * previous call to intel_process_dri2_buffer(). If a bo already has a
1296         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1297         * create a new name.
1298         */
1299       drm_intel_bo_flink(last_mt->bo, &old_name);
1300    }
1301
1302    if (old_name == buffer->name)
1303       return;
1304
1305    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1306       fprintf(stderr,
1307               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1308               buffer->name, buffer->attachment,
1309               buffer->cpp, buffer->pitch);
1310    }
1311
1312    intel_miptree_release(&rb->mt);
1313    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1314                                           buffer->name);
1315    if (!bo) {
1316       fprintf(stderr,
1317               "Failed to open BO for returned DRI2 buffer "
1318               "(%dx%d, %s, named %d).\n"
1319               "This is likely a bug in the X Server that will lead to a "
1320               "crash soon.\n",
1321               drawable->w, drawable->h, buffer_name, buffer->name);
1322       return;
1323    }
1324
1325    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1326                                             drawable->w, drawable->h,
1327                                             buffer->pitch);
1328
1329    if (brw_is_front_buffer_drawing(fb) &&
1330        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1331         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1332        rb->Base.Base.NumSamples > 1) {
1333       intel_renderbuffer_upsample(brw, rb);
1334    }
1335
1336    assert(rb->mt);
1337
1338    drm_intel_bo_unreference(bo);
1339 }
1340
1341 /**
1342  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1343  *
1344  * To determine which DRI buffers to request, examine the renderbuffers
1345  * attached to the drawable's framebuffer. Then request the buffers from
1346  * the image loader
1347  *
1348  * This is called from intel_update_renderbuffers().
1349  *
1350  * \param drawable      Drawable whose buffers are queried.
1351  * \param buffers       [out] List of buffers returned by DRI2 query.
1352  * \param buffer_count  [out] Number of buffers returned.
1353  *
1354  * \see intel_update_renderbuffers()
1355  */
1356
1357 static void
1358 intel_update_image_buffer(struct brw_context *intel,
1359                           __DRIdrawable *drawable,
1360                           struct intel_renderbuffer *rb,
1361                           __DRIimage *buffer,
1362                           enum __DRIimageBufferMask buffer_type)
1363 {
1364    struct gl_framebuffer *fb = drawable->driverPrivate;
1365
1366    if (!rb || !buffer->bo)
1367       return;
1368
1369    unsigned num_samples = rb->Base.Base.NumSamples;
1370
1371    /* Check and see if we're already bound to the right
1372     * buffer object
1373     */
1374    struct intel_mipmap_tree *last_mt;
1375    if (num_samples == 0)
1376       last_mt = rb->mt;
1377    else
1378       last_mt = rb->singlesample_mt;
1379
1380    if (last_mt && last_mt->bo == buffer->bo)
1381       return;
1382
1383    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1384                                             buffer->width, buffer->height,
1385                                             buffer->pitch);
1386
1387    if (brw_is_front_buffer_drawing(fb) &&
1388        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1389        rb->Base.Base.NumSamples > 1) {
1390       intel_renderbuffer_upsample(intel, rb);
1391    }
1392 }
1393
1394 static void
1395 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1396 {
1397    struct gl_framebuffer *fb = drawable->driverPrivate;
1398    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1399    struct intel_renderbuffer *front_rb;
1400    struct intel_renderbuffer *back_rb;
1401    struct __DRIimageList images;
1402    unsigned int format;
1403    uint32_t buffer_mask = 0;
1404
1405    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1406    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1407
1408    if (back_rb)
1409       format = intel_rb_format(back_rb);
1410    else if (front_rb)
1411       format = intel_rb_format(front_rb);
1412    else
1413       return;
1414
1415    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1416                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1417       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1418    }
1419
1420    if (back_rb)
1421       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1422
1423    (*screen->image.loader->getBuffers) (drawable,
1424                                         driGLFormatToImageFormat(format),
1425                                         &drawable->dri2.stamp,
1426                                         drawable->loaderPrivate,
1427                                         buffer_mask,
1428                                         &images);
1429
1430    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1431       drawable->w = images.front->width;
1432       drawable->h = images.front->height;
1433       intel_update_image_buffer(brw,
1434                                 drawable,
1435                                 front_rb,
1436                                 images.front,
1437                                 __DRI_IMAGE_BUFFER_FRONT);
1438    }
1439    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1440       drawable->w = images.back->width;
1441       drawable->h = images.back->height;
1442       intel_update_image_buffer(brw,
1443                                 drawable,
1444                                 back_rb,
1445                                 images.back,
1446                                 __DRI_IMAGE_BUFFER_BACK);
1447    }
1448 }