src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44
  45 #include "vbo/vbo_context.h"
  46
  47 #include "drivers/common/driverfuncs.h"
  48 #include "drivers/common/meta.h"
  49 #include "utils.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_defines.h"
  53 #include "brw_draw.h"
  54 #include "brw_state.h"
  55
  56 #include "intel_batchbuffer.h"
  57 #include "intel_buffer_objects.h"
  58 #include "intel_buffers.h"
  59 #include "intel_fbo.h"
  60 #include "intel_mipmap_tree.h"
  61 #include "intel_pixel.h"
  62 #include "intel_image.h"
  63 #include "intel_tex.h"
  64 #include "intel_tex_obj.h"
  65
  66 #include "swrast_setup/swrast_setup.h"
  67 #include "tnl/tnl.h"
  68 #include "tnl/t_pipeline.h"
  69 #include "util/ralloc.h"
  70
  71 /***************************************
  72  * Mesa's Driver Functions
  73  ***************************************/
  74
  75 static size_t
  76 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  77                              GLenum internalFormat, int samples[16])
  78 {
  79    struct brw_context *brw = brw_context(ctx);
  80
  81    (void) target;
  82
  83    switch (brw->gen) {
  84    case 8:
  85       samples[0] = 8;
  86       samples[1] = 4;
  87       samples[2] = 2;
  88       return 3;
  89
  90    case 7:
  91       samples[0] = 8;
  92       samples[1] = 4;
  93       return 2;
  94
  95    case 6:
  96       samples[0] = 4;
  97       return 1;
  98
  99    default:
 100       samples[0] = 1;
 101       return 1;
 102    }
 103 }
 104
 105 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 106
 107 const char *
 108 brw_get_renderer_string(unsigned deviceID)
 109 {
 110    const char *chipset;
 111    static char buffer[128];
 112
 113    switch (deviceID) {
 114 #undef CHIPSET
 115 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 116 #include "pci_ids/i965_pci_ids.h"
 117    default:
 118       chipset = "Unknown Intel Chipset";
 119       break;
 120    }
 121
 122    (void) driGetRendererString(buffer, chipset, 0);
 123    return buffer;
 124 }
 125
 126 static const GLubyte *
 127 intel_get_string(struct gl_context * ctx, GLenum name)
 128 {
 129    const struct brw_context *const brw = brw_context(ctx);
 130
 131    switch (name) {
 132    case GL_VENDOR:
 133       return (GLubyte *) brw_vendor_string;
 134
 135    case GL_RENDERER:
 136       return
 137          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 138
 139    default:
 140       return NULL;
 141    }
 142 }
 143
 144 static void
 145 intel_viewport(struct gl_context *ctx)
 146 {
 147    struct brw_context *brw = brw_context(ctx);
 148    __DRIcontext *driContext = brw->driContext;
 149
 150    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 151       dri2InvalidateDrawable(driContext->driDrawablePriv);
 152       dri2InvalidateDrawable(driContext->driReadablePriv);
 153    }
 154 }
 155
 156 static void
 157 intel_update_state(struct gl_context * ctx, GLuint new_state)
 158 {
 159    struct brw_context *brw = brw_context(ctx);
 160    struct intel_texture_object *tex_obj;
 161    struct intel_renderbuffer *depth_irb;
 162
 163    if (ctx->swrast_context)
 164       _swrast_InvalidateState(ctx, new_state);
 165    _vbo_InvalidateState(ctx, new_state);
 166
 167    brw->NewGLState |= new_state;
 168
 169    _mesa_unlock_context_textures(ctx);
 170
 171    /* Resolve the depth buffer's HiZ buffer. */
 172    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 173    if (depth_irb)
 174       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 175
 176    /* Resolve depth buffer and render cache of each enabled texture. */
 177    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 178    for (int i = 0; i <= maxEnabledUnit; i++) {
 179       if (!ctx->Texture.Unit[i]._Current)
 180          continue;
 181       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 182       if (!tex_obj || !tex_obj->mt)
 183          continue;
 184       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 185       intel_miptree_resolve_color(brw, tex_obj->mt);
 186       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 187    }
 188
 189    _mesa_lock_context_textures(ctx);
 190 }
 191
 192 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 193
 194 static void
 195 intel_flush_front(struct gl_context *ctx)
 196 {
 197    struct brw_context *brw = brw_context(ctx);
 198    __DRIcontext *driContext = brw->driContext;
 199    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 200    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 201
 202    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 203       if (flushFront(screen) && driDrawable &&
 204           driDrawable->loaderPrivate) {
 205
 206          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 207           *
 208           * This potentially resolves both front and back buffer. It
 209           * is unnecessary to resolve the back, but harms nothing except
 210           * performance. And no one cares about front-buffer render
 211           * performance.
 212           */
 213          intel_resolve_for_dri2_flush(brw, driDrawable);
 214          intel_batchbuffer_flush(brw);
 215
 216          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 217
 218          /* We set the dirty bit in intel_prepare_render() if we're
 219           * front buffer rendering once we get there.
 220           */
 221          brw->front_buffer_dirty = false;
 222       }
 223    }
 224 }
 225
 226 static void
 227 intel_glFlush(struct gl_context *ctx)
 228 {
 229    struct brw_context *brw = brw_context(ctx);
 230
 231    intel_batchbuffer_flush(brw);
 232    intel_flush_front(ctx);
 233    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
 234       brw->need_throttle = true;
 235 }
 236
 237 static void
 238 intel_finish(struct gl_context * ctx)
 239 {
 240    struct brw_context *brw = brw_context(ctx);
 241
 242    intel_glFlush(ctx);
 243
 244    if (brw->batch.last_bo)
 245       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 246 }
 247
 248 static void
 249 brw_init_driver_functions(struct brw_context *brw,
 250                           struct dd_function_table *functions)
 251 {
 252    _mesa_init_driver_functions(functions);
 253
 254    /* GLX uses DRI2 invalidate events to handle window resizing.
 255     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 256     * which doesn't provide a mechanism for snooping the event queues.
 257     *
 258     * So EGL still relies on viewport hacks to handle window resizing.
 259     * This should go away with DRI3000.
 260     */
 261    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 262       functions->Viewport = intel_viewport;
 263
 264    functions->Flush = intel_glFlush;
 265    functions->Finish = intel_finish;
 266    functions->GetString = intel_get_string;
 267    functions->UpdateState = intel_update_state;
 268
 269    intelInitTextureFuncs(functions);
 270    intelInitTextureImageFuncs(functions);
 271    intelInitTextureSubImageFuncs(functions);
 272    intelInitTextureCopyImageFuncs(functions);
 273    intelInitCopyImageFuncs(functions);
 274    intelInitClearFuncs(functions);
 275    intelInitBufferFuncs(functions);
 276    intelInitPixelFuncs(functions);
 277    intelInitBufferObjectFuncs(functions);
 278    intel_init_syncobj_functions(functions);
 279    brw_init_object_purgeable_functions(functions);
 280
 281    brwInitFragProgFuncs( functions );
 282    brw_init_common_queryobj_functions(functions);
 283    if (brw->gen >= 6)
 284       gen6_init_queryobj_functions(functions);
 285    else
 286       gen4_init_queryobj_functions(functions);
 287
 288    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 289
 290    functions->NewTransformFeedback = brw_new_transform_feedback;
 291    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 292    functions->GetTransformFeedbackVertexCount =
 293       brw_get_transform_feedback_vertex_count;
 294    if (brw->gen >= 7) {
 295       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 296       functions->EndTransformFeedback = gen7_end_transform_feedback;
 297       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 298       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 299    } else {
 300       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 301       functions->EndTransformFeedback = brw_end_transform_feedback;
 302    }
 303
 304    if (brw->gen >= 6)
 305       functions->GetSamplePosition = gen6_get_sample_position;
 306 }
 307
 308 static void
 309 brw_initialize_context_constants(struct brw_context *brw)
 310 {
 311    struct gl_context *ctx = &brw->ctx;
 312
 313    unsigned max_samplers =
 314       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 315
 316    ctx->Const.QueryCounterBits.Timestamp = 36;
 317
 318    ctx->Const.StripTextureBorder = true;
 319
 320    ctx->Const.MaxDualSourceDrawBuffers = 1;
 321    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 322    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 323    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 324    ctx->Const.MaxTextureUnits =
 325       MIN2(ctx->Const.MaxTextureCoordUnits,
 326            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 327    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 328    if (brw->gen >= 6)
 329       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 330    else
 331       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 332    if (_mesa_extension_override_enables.ARB_compute_shader) {
 333       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 334       ctx->Const.MaxUniformBufferBindings += 12;
 335    } else {
 336       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 337    }
 338    ctx->Const.MaxCombinedTextureImageUnits =
 339       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 340       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 341       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 342       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 343
 344    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 345    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 346       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 347    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 348    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 349    ctx->Const.MaxTextureMbytes = 1536;
 350
 351    if (brw->gen >= 7)
 352       ctx->Const.MaxArrayTextureLayers = 2048;
 353    else
 354       ctx->Const.MaxArrayTextureLayers = 512;
 355
 356    ctx->Const.MaxTextureRectSize = 1 << 12;
 357
 358    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 359
 360    ctx->Const.MaxRenderbufferSize = 8192;
 361
 362    /* Hardware only supports a limited number of transform feedback buffers.
 363     * So we need to override the Mesa default (which is based only on software
 364     * limits).
 365     */
 366    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 367
 368    /* On Gen6, in the worst case, we use up one binding table entry per
 369     * transform feedback component (see comments above the definition of
 370     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 371     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 372     * BRW_MAX_SOL_BINDINGS.
 373     *
 374     * In "separate components" mode, we need to divide this value by
 375     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 376     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 377     */
 378    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 379    ctx->Const.MaxTransformFeedbackSeparateComponents =
 380       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 381
 382    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 383
 384    int max_samples;
 385    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 386    const int clamp_max_samples =
 387       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 388
 389    if (clamp_max_samples < 0) {
 390       max_samples = msaa_modes[0];
 391    } else {
 392       /* Select the largest supported MSAA mode that does not exceed
 393        * clamp_max_samples.
 394        */
 395       max_samples = 0;
 396       for (int i = 0; msaa_modes[i] != 0; ++i) {
 397          if (msaa_modes[i] <= clamp_max_samples) {
 398             max_samples = msaa_modes[i];
 399             break;
 400          }
 401       }
 402    }
 403
 404    ctx->Const.MaxSamples = max_samples;
 405    ctx->Const.MaxColorTextureSamples = max_samples;
 406    ctx->Const.MaxDepthTextureSamples = max_samples;
 407    ctx->Const.MaxIntegerSamples = max_samples;
 408
 409    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 410     * to map indices of rectangular grid to sample numbers within a pixel.
 411     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 412     * extension implementation. For more details see the comment above
 413     * gen6_set_sample_maps() definition.
 414     */
 415    gen6_set_sample_maps(ctx);
 416
 417    if (brw->gen >= 7)
 418       ctx->Const.MaxProgramTextureGatherComponents = 4;
 419    else if (brw->gen == 6)
 420       ctx->Const.MaxProgramTextureGatherComponents = 1;
 421
 422    ctx->Const.MinLineWidth = 1.0;
 423    ctx->Const.MinLineWidthAA = 1.0;
 424    if (brw->gen >= 9 || brw->is_cherryview) {
 425       ctx->Const.MaxLineWidth = 40.0;
 426       ctx->Const.MaxLineWidthAA = 40.0;
 427       ctx->Const.LineWidthGranularity = 0.125;
 428    } else if (brw->gen >= 6) {
 429       ctx->Const.MaxLineWidth = 7.875;
 430       ctx->Const.MaxLineWidthAA = 7.875;
 431       ctx->Const.LineWidthGranularity = 0.125;
 432    } else {
 433       ctx->Const.MaxLineWidth = 7.0;
 434       ctx->Const.MaxLineWidthAA = 7.0;
 435       ctx->Const.LineWidthGranularity = 0.5;
 436    }
 437
 438    ctx->Const.MinPointSize = 1.0;
 439    ctx->Const.MinPointSizeAA = 1.0;
 440    ctx->Const.MaxPointSize = 255.0;
 441    ctx->Const.MaxPointSizeAA = 255.0;
 442    ctx->Const.PointSizeGranularity = 1.0;
 443
 444    if (brw->gen >= 5 || brw->is_g4x)
 445       ctx->Const.MaxClipPlanes = 8;
 446
 447    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 448    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 449    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 450    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 451    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 452    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 453    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 454    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 455    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 456    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 457    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 458    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 459       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 460            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 461
 462    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 463    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 464    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 465    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 466    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 467    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 468    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 469    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 470    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 471       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 472            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 473
 474    /* Fragment shaders use real, 32-bit twos-complement integers for all
 475     * integer types.
 476     */
 477    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 478    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 479    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 480    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 481    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 482
 483    if (brw->gen >= 7) {
 484       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 485       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 486       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 487       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 488       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 489       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 490       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 491       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 492       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 493    }
 494
 495    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 496     * but we're not sure how it's actually done for vertex order,
 497     * that affect provoking vertex decision. Always use last vertex
 498     * convention for quad primitive which works as expected for now.
 499     */
 500    if (brw->gen >= 6)
 501       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 502
 503    ctx->Const.NativeIntegers = true;
 504    ctx->Const.VertexID_is_zero_based = true;
 505
 506    /* Regarding the CMP instruction, the Ivybridge PRM says:
 507     *
 508     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 509     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 510     *    0xFFFFFFFF) is assigned to dst."
 511     *
 512     * but PRMs for earlier generations say
 513     *
 514     *   "In dword format, one GRF may store up to 8 results. When the register
 515     *    is used later as a vector of Booleans, as only LSB at each channel
 516     *    contains meaning [sic] data, software should make sure all higher bits
 517     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 518     *
 519     * We select the representation of a true boolean uniform to match what the
 520     * CMP instruction returns.
 521     *
 522     * The Sandybridge BSpec's description of the CMP instruction matches that
 523     * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems
 524     * to have not been updated from Ironlake). Its CMP instruction behaves like
 525     * Ivybridge and newer.
 526     */
 527    if (brw->gen >= 6)
 528       ctx->Const.UniformBooleanTrue = ~0;
 529    else
 530       ctx->Const.UniformBooleanTrue = 1;
 531
 532    /* From the gen4 PRM, volume 4 page 127:
 533     *
 534     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 535     *      the base address of the first element of the surface, computed in
 536     *      software by adding the surface base address to the byte offset of
 537     *      the element in the buffer."
 538     *
 539     * However, unaligned accesses are slower, so enforce buffer alignment.
 540     */
 541    ctx->Const.UniformBufferOffsetAlignment = 16;
 542    ctx->Const.TextureBufferOffsetAlignment = 16;
 543
 544    if (brw->gen >= 6) {
 545       ctx->Const.MaxVarying = 32;
 546       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 547       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 548       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 549       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 550    }
 551
 552    /* We want the GLSL compiler to emit code that uses condition codes */
 553    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 554       ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 555       ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
 556       ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
 557       ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
 558       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 559       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
 560          (i == MESA_SHADER_FRAGMENT);
 561       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
 562          (i == MESA_SHADER_FRAGMENT);
 563       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 564       ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
 565    }
 566
 567    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 568    ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 569
 570    /* ARB_viewport_array */
 571    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 572       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 573       ctx->Const.ViewportSubpixelBits = 0;
 574
 575       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 576        */
 577       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 578       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 579    }
 580
 581    /* ARB_gpu_shader5 */
 582    if (brw->gen >= 7)
 583       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 584 }
 585
 586 /**
 587  * Process driconf (drirc) options, setting appropriate context flags.
 588  *
 589  * intelInitExtensions still pokes at optionCache directly, in order to
 590  * avoid advertising various extensions.  No flags are set, so it makes
 591  * sense to continue doing that there.
 592  */
 593 static void
 594 brw_process_driconf_options(struct brw_context *brw)
 595 {
 596    struct gl_context *ctx = &brw->ctx;
 597
 598    driOptionCache *options = &brw->optionCache;
 599    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 600                        brw->driContext->driScreenPriv->myNum, "i965");
 601
 602    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 603    switch (bo_reuse_mode) {
 604    case DRI_CONF_BO_REUSE_DISABLED:
 605       break;
 606    case DRI_CONF_BO_REUSE_ALL:
 607       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 608       break;
 609    }
 610
 611    if (!driQueryOptionb(options, "hiz")) {
 612        brw->has_hiz = false;
 613        /* On gen6, you can only do separate stencil with HIZ. */
 614        if (brw->gen == 6)
 615           brw->has_separate_stencil = false;
 616    }
 617
 618    if (driQueryOptionb(options, "always_flush_batch")) {
 619       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 620       brw->always_flush_batch = true;
 621    }
 622
 623    if (driQueryOptionb(options, "always_flush_cache")) {
 624       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 625       brw->always_flush_cache = true;
 626    }
 627
 628    if (driQueryOptionb(options, "disable_throttling")) {
 629       fprintf(stderr, "disabling flush throttling\n");
 630       brw->disable_throttling = true;
 631    }
 632
 633    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 634
 635    ctx->Const.ForceGLSLExtensionsWarn =
 636       driQueryOptionb(options, "force_glsl_extensions_warn");
 637
 638    ctx->Const.DisableGLSLLineContinuations =
 639       driQueryOptionb(options, "disable_glsl_line_continuations");
 640
 641    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 642       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 643 }
 644
 645 GLboolean
 646 brwCreateContext(gl_api api,
 647                  const struct gl_config *mesaVis,
 648                  __DRIcontext *driContextPriv,
 649                  unsigned major_version,
 650                  unsigned minor_version,
 651                  uint32_t flags,
 652                  bool notify_reset,
 653                  unsigned *dri_ctx_error,
 654                  void *sharedContextPrivate)
 655 {
 656    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 657    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 658    struct intel_screen *screen = sPriv->driverPrivate;
 659    const struct brw_device_info *devinfo = screen->devinfo;
 660    struct dd_function_table functions;
 661
 662    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 663     * provides us with context reset notifications.
 664     */
 665    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 666       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 667
 668    if (screen->has_context_reset_notification)
 669       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 670
 671    if (flags & ~allowed_flags) {
 672       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 673       return false;
 674    }
 675
 676    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 677    if (!brw) {
 678       fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
 679       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 680       return false;
 681    }
 682
 683    driContextPriv->driverPrivate = brw;
 684    brw->driContext = driContextPriv;
 685    brw->intelScreen = screen;
 686    brw->bufmgr = screen->bufmgr;
 687
 688    brw->gen = devinfo->gen;
 689    brw->gt = devinfo->gt;
 690    brw->is_g4x = devinfo->is_g4x;
 691    brw->is_baytrail = devinfo->is_baytrail;
 692    brw->is_haswell = devinfo->is_haswell;
 693    brw->is_cherryview = devinfo->is_cherryview;
 694    brw->has_llc = devinfo->has_llc;
 695    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 696    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 697    brw->has_pln = devinfo->has_pln;
 698    brw->has_compr4 = devinfo->has_compr4;
 699    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 700    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 701    brw->needs_unlit_centroid_workaround =
 702       devinfo->needs_unlit_centroid_workaround;
 703
 704    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 705    brw->has_swizzling = screen->hw_has_swizzling;
 706
 707    brw->vs.base.stage = MESA_SHADER_VERTEX;
 708    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 709    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 710    if (brw->gen >= 8) {
 711       gen8_init_vtable_surface_functions(brw);
 712       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 713    } else if (brw->gen >= 7) {
 714       gen7_init_vtable_surface_functions(brw);
 715       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 716    } else if (brw->gen >= 6) {
 717       gen6_init_vtable_surface_functions(brw);
 718       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 719    } else {
 720       gen4_init_vtable_surface_functions(brw);
 721       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 722    }
 723
 724    brw_init_driver_functions(brw, &functions);
 725
 726    if (notify_reset)
 727       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 728
 729    struct gl_context *ctx = &brw->ctx;
 730
 731    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 732       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 733       fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
 734       intelDestroyContext(driContextPriv);
 735       return false;
 736    }
 737
 738    driContextSetFlags(ctx, flags);
 739
 740    /* Initialize the software rasterizer and helper modules.
 741     *
 742     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 743     * software fallbacks (which we have to support on legacy GL to do weird
 744     * glDrawPixels(), glBitmap(), and other functions).
 745     */
 746    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 747       _swrast_CreateContext(ctx);
 748    }
 749
 750    _vbo_CreateContext(ctx);
 751    if (ctx->swrast_context) {
 752       _tnl_CreateContext(ctx);
 753       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 754       _swsetup_CreateContext(ctx);
 755
 756       /* Configure swrast to match hardware characteristics: */
 757       _swrast_allow_pixel_fog(ctx, false);
 758       _swrast_allow_vertex_fog(ctx, true);
 759    }
 760
 761    _mesa_meta_init(ctx);
 762
 763    brw_process_driconf_options(brw);
 764    brw_process_intel_debug_variable(brw);
 765    brw_initialize_context_constants(brw);
 766
 767    ctx->Const.ResetStrategy = notify_reset
 768       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 769
 770    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 771    _mesa_init_point(ctx);
 772
 773    intel_fbo_init(brw);
 774
 775    intel_batchbuffer_init(brw);
 776
 777    if (brw->gen >= 6) {
 778       /* Create a new hardware context.  Using a hardware context means that
 779        * our GPU state will be saved/restored on context switch, allowing us
 780        * to assume that the GPU is in the same state we left it in.
 781        *
 782        * This is required for transform feedback buffer offsets, query objects,
 783        * and also allows us to reduce how much state we have to emit.
 784        */
 785       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 786
 787       if (!brw->hw_ctx) {
 788          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 789          intelDestroyContext(driContextPriv);
 790          return false;
 791       }
 792    }
 793
 794    brw_init_state(brw);
 795
 796    intelInitExtensions(ctx);
 797
 798    brw_init_surface_formats(brw);
 799
 800    brw->max_vs_threads = devinfo->max_vs_threads;
 801    brw->max_gs_threads = devinfo->max_gs_threads;
 802    brw->max_wm_threads = devinfo->max_wm_threads;
 803    brw->urb.size = devinfo->urb.size;
 804    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 805    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 806    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 807
 808    /* Estimate the size of the mappable aperture into the GTT.  There's an
 809     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 810     * It turns out it's basically always 256MB, though some ancient hardware
 811     * was smaller.
 812     */
 813    uint32_t gtt_size = 256 * 1024 * 1024;
 814
 815    /* We don't want to map two objects such that a memcpy between them would
 816     * just fault one mapping in and then the other over and over forever.  So
 817     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 818     * taken up by things like the framebuffer and the ringbuffer and such, so
 819     * be more conservative.
 820     */
 821    brw->max_gtt_map_object_size = gtt_size / 4;
 822
 823    if (brw->gen == 6)
 824       brw->urb.gs_present = false;
 825
 826    brw->prim_restart.in_progress = false;
 827    brw->prim_restart.enable_cut_index = false;
 828    brw->gs.enabled = false;
 829    brw->sf.viewport_transform_enable = true;
 830
 831    ctx->VertexProgram._MaintainTnlProgram = true;
 832    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 833
 834    brw_draw_init( brw );
 835
 836    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 837       /* Turn on some extra GL_ARB_debug_output generation. */
 838       brw->perf_debug = true;
 839    }
 840
 841    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 842       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 843
 844    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 845       brw_init_shader_time(brw);
 846
 847    _mesa_compute_version(ctx);
 848
 849    _mesa_initialize_dispatch_tables(ctx);
 850    _mesa_initialize_vbo_vtxfmt(ctx);
 851
 852    if (ctx->Extensions.AMD_performance_monitor) {
 853       brw_init_performance_monitors(brw);
 854    }
 855
 856    vbo_use_buffer_objects(ctx);
 857    vbo_always_unmap_buffers(ctx);
 858
 859    return true;
 860 }
 861
 862 void
 863 intelDestroyContext(__DRIcontext * driContextPriv)
 864 {
 865    struct brw_context *brw =
 866       (struct brw_context *) driContextPriv->driverPrivate;
 867    struct gl_context *ctx = &brw->ctx;
 868
 869    assert(brw); /* should never be null */
 870    if (!brw)
 871       return;
 872
 873    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 874    if (INTEL_DEBUG & DEBUG_AUB) {
 875       intel_batchbuffer_flush(brw);
 876       aub_dump_bmp(&brw->ctx);
 877    }
 878
 879    _mesa_meta_free(&brw->ctx);
 880    brw_meta_fast_clear_free(brw);
 881
 882    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 883       /* Force a report. */
 884       brw->shader_time.report_time = 0;
 885
 886       brw_collect_and_report_shader_time(brw);
 887       brw_destroy_shader_time(brw);
 888    }
 889
 890    brw_destroy_state(brw);
 891    brw_draw_destroy(brw);
 892
 893    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 894
 895    drm_intel_gem_context_destroy(brw->hw_ctx);
 896
 897    if (ctx->swrast_context) {
 898       _swsetup_DestroyContext(&brw->ctx);
 899       _tnl_DestroyContext(&brw->ctx);
 900    }
 901    _vbo_DestroyContext(&brw->ctx);
 902
 903    if (ctx->swrast_context)
 904       _swrast_DestroyContext(&brw->ctx);
 905
 906    intel_batchbuffer_free(brw);
 907
 908    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 909    brw->first_post_swapbuffers_batch = NULL;
 910
 911    driDestroyOptionCache(&brw->optionCache);
 912
 913    /* free the Mesa context */
 914    _mesa_free_context_data(&brw->ctx);
 915
 916    ralloc_free(brw);
 917    driContextPriv->driverPrivate = NULL;
 918 }
 919
 920 GLboolean
 921 intelUnbindContext(__DRIcontext * driContextPriv)
 922 {
 923    /* Unset current context and dispath table */
 924    _mesa_make_current(NULL, NULL, NULL);
 925
 926    return true;
 927 }
 928
 929 /**
 930  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 931  * on window system framebuffers.
 932  *
 933  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 934  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 935  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 936  * for a visual where you're guaranteed to be capable, but it turns out that
 937  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 938  * incapable ones, becuase there's no difference between the two in resources
 939  * used.  Applications thus get built that accidentally rely on the default
 940  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 941  * great...
 942  *
 943  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 944  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 945  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 946  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 947  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 948  * and get no sRGB encode (assuming that both kinds of visual are available).
 949  * Thus our choice to support sRGB by default on our visuals for desktop would
 950  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 951  *
 952  * Unfortunately, renderbuffer setup happens before a context is created.  So
 953  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 954  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 955  * yet), we go turn that back off before anyone finds out.
 956  */
 957 static void
 958 intel_gles3_srgb_workaround(struct brw_context *brw,
 959                             struct gl_framebuffer *fb)
 960 {
 961    struct gl_context *ctx = &brw->ctx;
 962
 963    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 964       return;
 965
 966    /* Some day when we support the sRGB capable bit on visuals available for
 967     * GLES, we'll need to respect that and not disable things here.
 968     */
 969    fb->Visual.sRGBCapable = false;
 970    for (int i = 0; i < BUFFER_COUNT; i++) {
 971       if (fb->Attachment[i].Renderbuffer &&
 972           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 973          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 974       }
 975    }
 976 }
 977
 978 GLboolean
 979 intelMakeCurrent(__DRIcontext * driContextPriv,
 980                  __DRIdrawable * driDrawPriv,
 981                  __DRIdrawable * driReadPriv)
 982 {
 983    struct brw_context *brw;
 984    GET_CURRENT_CONTEXT(curCtx);
 985
 986    if (driContextPriv)
 987       brw = (struct brw_context *) driContextPriv->driverPrivate;
 988    else
 989       brw = NULL;
 990
 991    /* According to the glXMakeCurrent() man page: "Pending commands to
 992     * the previous context, if any, are flushed before it is released."
 993     * But only flush if we're actually changing contexts.
 994     */
 995    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 996       _mesa_flush(curCtx);
 997    }
 998
 999    if (driContextPriv) {
1000       struct gl_context *ctx = &brw->ctx;
1001       struct gl_framebuffer *fb, *readFb;
1002
1003       if (driDrawPriv == NULL) {
1004          fb = _mesa_get_incomplete_framebuffer();
1005       } else {
1006          fb = driDrawPriv->driverPrivate;
1007          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1008       }
1009
1010       if (driReadPriv == NULL) {
1011          readFb = _mesa_get_incomplete_framebuffer();
1012       } else {
1013          readFb = driReadPriv->driverPrivate;
1014          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1015       }
1016
1017       /* The sRGB workaround changes the renderbuffer's format. We must change
1018        * the format before the renderbuffer's miptree get's allocated, otherwise
1019        * the formats of the renderbuffer and its miptree will differ.
1020        */
1021       intel_gles3_srgb_workaround(brw, fb);
1022       intel_gles3_srgb_workaround(brw, readFb);
1023
1024       /* If the context viewport hasn't been initialized, force a call out to
1025        * the loader to get buffers so we have a drawable size for the initial
1026        * viewport. */
1027       if (!brw->ctx.ViewportInitialized)
1028          intel_prepare_render(brw);
1029
1030       _mesa_make_current(ctx, fb, readFb);
1031    } else {
1032       _mesa_make_current(NULL, NULL, NULL);
1033    }
1034
1035    return true;
1036 }
1037
1038 void
1039 intel_resolve_for_dri2_flush(struct brw_context *brw,
1040                              __DRIdrawable *drawable)
1041 {
1042    if (brw->gen < 6) {
1043       /* MSAA and fast color clear are not supported, so don't waste time
1044        * checking whether a resolve is needed.
1045        */
1046       return;
1047    }
1048
1049    struct gl_framebuffer *fb = drawable->driverPrivate;
1050    struct intel_renderbuffer *rb;
1051
1052    /* Usually, only the back buffer will need to be downsampled. However,
1053     * the front buffer will also need it if the user has rendered into it.
1054     */
1055    static const gl_buffer_index buffers[2] = {
1056          BUFFER_BACK_LEFT,
1057          BUFFER_FRONT_LEFT,
1058    };
1059
1060    for (int i = 0; i < 2; ++i) {
1061       rb = intel_get_renderbuffer(fb, buffers[i]);
1062       if (rb == NULL || rb->mt == NULL)
1063          continue;
1064       if (rb->mt->num_samples <= 1)
1065          intel_miptree_resolve_color(brw, rb->mt);
1066       else
1067          intel_renderbuffer_downsample(brw, rb);
1068    }
1069 }
1070
1071 static unsigned
1072 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1073 {
1074    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1075 }
1076
1077 static void
1078 intel_query_dri2_buffers(struct brw_context *brw,
1079                          __DRIdrawable *drawable,
1080                          __DRIbuffer **buffers,
1081                          int *count);
1082
1083 static void
1084 intel_process_dri2_buffer(struct brw_context *brw,
1085                           __DRIdrawable *drawable,
1086                           __DRIbuffer *buffer,
1087                           struct intel_renderbuffer *rb,
1088                           const char *buffer_name);
1089
1090 static void
1091 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1092
1093 static void
1094 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1095 {
1096    struct gl_framebuffer *fb = drawable->driverPrivate;
1097    struct intel_renderbuffer *rb;
1098    __DRIbuffer *buffers = NULL;
1099    int i, count;
1100    const char *region_name;
1101
1102    /* Set this up front, so that in case our buffers get invalidated
1103     * while we're getting new buffers, we don't clobber the stamp and
1104     * thus ignore the invalidate. */
1105    drawable->lastStamp = drawable->dri2.stamp;
1106
1107    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1108       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1109
1110    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1111
1112    if (buffers == NULL)
1113       return;
1114
1115    for (i = 0; i < count; i++) {
1116        switch (buffers[i].attachment) {
1117        case __DRI_BUFFER_FRONT_LEFT:
1118            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1119            region_name = "dri2 front buffer";
1120            break;
1121
1122        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1123            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1124            region_name = "dri2 fake front buffer";
1125            break;
1126
1127        case __DRI_BUFFER_BACK_LEFT:
1128            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1129            region_name = "dri2 back buffer";
1130            break;
1131
1132        case __DRI_BUFFER_DEPTH:
1133        case __DRI_BUFFER_HIZ:
1134        case __DRI_BUFFER_DEPTH_STENCIL:
1135        case __DRI_BUFFER_STENCIL:
1136        case __DRI_BUFFER_ACCUM:
1137        default:
1138            fprintf(stderr,
1139                    "unhandled buffer attach event, attachment type %d\n",
1140                    buffers[i].attachment);
1141            return;
1142        }
1143
1144        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1145    }
1146
1147 }
1148
1149 void
1150 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1151 {
1152    struct brw_context *brw = context->driverPrivate;
1153    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1154
1155    /* Set this up front, so that in case our buffers get invalidated
1156     * while we're getting new buffers, we don't clobber the stamp and
1157     * thus ignore the invalidate. */
1158    drawable->lastStamp = drawable->dri2.stamp;
1159
1160    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1161       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1162
1163    if (screen->image.loader)
1164       intel_update_image_buffers(brw, drawable);
1165    else
1166       intel_update_dri2_buffers(brw, drawable);
1167
1168    driUpdateFramebufferSize(&brw->ctx, drawable);
1169 }
1170
1171 /**
1172  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1173  * state is required.
1174  */
1175 void
1176 intel_prepare_render(struct brw_context *brw)
1177 {
1178    struct gl_context *ctx = &brw->ctx;
1179    __DRIcontext *driContext = brw->driContext;
1180    __DRIdrawable *drawable;
1181
1182    drawable = driContext->driDrawablePriv;
1183    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1184       if (drawable->lastStamp != drawable->dri2.stamp)
1185          intel_update_renderbuffers(driContext, drawable);
1186       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1187    }
1188
1189    drawable = driContext->driReadablePriv;
1190    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1191       if (drawable->lastStamp != drawable->dri2.stamp)
1192          intel_update_renderbuffers(driContext, drawable);
1193       driContext->dri2.read_stamp = drawable->dri2.stamp;
1194    }
1195
1196    /* If we're currently rendering to the front buffer, the rendering
1197     * that will happen next will probably dirty the front buffer.  So
1198     * mark it as dirty here.
1199     */
1200    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1201       brw->front_buffer_dirty = true;
1202
1203    /* Wait for the swapbuffers before the one we just emitted, so we
1204     * don't get too many swaps outstanding for apps that are GPU-heavy
1205     * but not CPU-heavy.
1206     *
1207     * We're using intelDRI2Flush (called from the loader before
1208     * swapbuffer) and glFlush (for front buffer rendering) as the
1209     * indicator that a frame is done and then throttle when we get
1210     * here as we prepare to render the next frame.  At this point for
1211     * round trips for swap/copy and getting new buffers are done and
1212     * we'll spend less time waiting on the GPU.
1213     *
1214     * Unfortunately, we don't have a handle to the batch containing
1215     * the swap, and getting our hands on that doesn't seem worth it,
1216     * so we just us the first batch we emitted after the last swap.
1217     */
1218    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1219       if (!brw->disable_throttling)
1220          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1221       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1222       brw->first_post_swapbuffers_batch = NULL;
1223       brw->need_throttle = false;
1224    }
1225 }
1226
1227 /**
1228  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1229  *
1230  * To determine which DRI buffers to request, examine the renderbuffers
1231  * attached to the drawable's framebuffer. Then request the buffers with
1232  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1233  *
1234  * This is called from intel_update_renderbuffers().
1235  *
1236  * \param drawable      Drawable whose buffers are queried.
1237  * \param buffers       [out] List of buffers returned by DRI2 query.
1238  * \param buffer_count  [out] Number of buffers returned.
1239  *
1240  * \see intel_update_renderbuffers()
1241  * \see DRI2GetBuffers()
1242  * \see DRI2GetBuffersWithFormat()
1243  */
1244 static void
1245 intel_query_dri2_buffers(struct brw_context *brw,
1246                          __DRIdrawable *drawable,
1247                          __DRIbuffer **buffers,
1248                          int *buffer_count)
1249 {
1250    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1251    struct gl_framebuffer *fb = drawable->driverPrivate;
1252    int i = 0;
1253    unsigned attachments[8];
1254
1255    struct intel_renderbuffer *front_rb;
1256    struct intel_renderbuffer *back_rb;
1257
1258    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1259    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1260
1261    memset(attachments, 0, sizeof(attachments));
1262    if ((brw_is_front_buffer_drawing(fb) ||
1263         brw_is_front_buffer_reading(fb) ||
1264         !back_rb) && front_rb) {
1265       /* If a fake front buffer is in use, then querying for
1266        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1267        * the real front buffer to the fake front buffer.  So before doing the
1268        * query, we need to make sure all the pending drawing has landed in the
1269        * real front buffer.
1270        */
1271       intel_batchbuffer_flush(brw);
1272       intel_flush_front(&brw->ctx);
1273
1274       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1275       attachments[i++] = intel_bits_per_pixel(front_rb);
1276    } else if (front_rb && brw->front_buffer_dirty) {
1277       /* We have pending front buffer rendering, but we aren't querying for a
1278        * front buffer.  If the front buffer we have is a fake front buffer,
1279        * the X server is going to throw it away when it processes the query.
1280        * So before doing the query, make sure all the pending drawing has
1281        * landed in the real front buffer.
1282        */
1283       intel_batchbuffer_flush(brw);
1284       intel_flush_front(&brw->ctx);
1285    }
1286
1287    if (back_rb) {
1288       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1289       attachments[i++] = intel_bits_per_pixel(back_rb);
1290    }
1291
1292    assert(i <= ARRAY_SIZE(attachments));
1293
1294    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1295                                                         &drawable->w,
1296                                                         &drawable->h,
1297                                                         attachments, i / 2,
1298                                                         buffer_count,
1299                                                         drawable->loaderPrivate);
1300 }
1301
1302 /**
1303  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1304  *
1305  * This is called from intel_update_renderbuffers().
1306  *
1307  * \par Note:
1308  *    DRI buffers whose attachment point is DRI2BufferStencil or
1309  *    DRI2BufferDepthStencil are handled as special cases.
1310  *
1311  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1312  *        that is passed to drm_intel_bo_gem_create_from_name().
1313  *
1314  * \see intel_update_renderbuffers()
1315  */
1316 static void
1317 intel_process_dri2_buffer(struct brw_context *brw,
1318                           __DRIdrawable *drawable,
1319                           __DRIbuffer *buffer,
1320                           struct intel_renderbuffer *rb,
1321                           const char *buffer_name)
1322 {
1323    struct gl_framebuffer *fb = drawable->driverPrivate;
1324    drm_intel_bo *bo;
1325
1326    if (!rb)
1327       return;
1328
1329    unsigned num_samples = rb->Base.Base.NumSamples;
1330
1331    /* We try to avoid closing and reopening the same BO name, because the first
1332     * use of a mapping of the buffer involves a bunch of page faulting which is
1333     * moderately expensive.
1334     */
1335    struct intel_mipmap_tree *last_mt;
1336    if (num_samples == 0)
1337       last_mt = rb->mt;
1338    else
1339       last_mt = rb->singlesample_mt;
1340
1341    uint32_t old_name = 0;
1342    if (last_mt) {
1343        /* The bo already has a name because the miptree was created by a
1344         * previous call to intel_process_dri2_buffer(). If a bo already has a
1345         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1346         * create a new name.
1347         */
1348       drm_intel_bo_flink(last_mt->bo, &old_name);
1349    }
1350
1351    if (old_name == buffer->name)
1352       return;
1353
1354    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1355       fprintf(stderr,
1356               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1357               buffer->name, buffer->attachment,
1358               buffer->cpp, buffer->pitch);
1359    }
1360
1361    intel_miptree_release(&rb->mt);
1362    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1363                                           buffer->name);
1364    if (!bo) {
1365       fprintf(stderr,
1366               "Failed to open BO for returned DRI2 buffer "
1367               "(%dx%d, %s, named %d).\n"
1368               "This is likely a bug in the X Server that will lead to a "
1369               "crash soon.\n",
1370               drawable->w, drawable->h, buffer_name, buffer->name);
1371       return;
1372    }
1373
1374    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1375                                             drawable->w, drawable->h,
1376                                             buffer->pitch);
1377
1378    if (brw_is_front_buffer_drawing(fb) &&
1379        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1380         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1381        rb->Base.Base.NumSamples > 1) {
1382       intel_renderbuffer_upsample(brw, rb);
1383    }
1384
1385    assert(rb->mt);
1386
1387    drm_intel_bo_unreference(bo);
1388 }
1389
1390 /**
1391  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1392  *
1393  * To determine which DRI buffers to request, examine the renderbuffers
1394  * attached to the drawable's framebuffer. Then request the buffers from
1395  * the image loader
1396  *
1397  * This is called from intel_update_renderbuffers().
1398  *
1399  * \param drawable      Drawable whose buffers are queried.
1400  * \param buffers       [out] List of buffers returned by DRI2 query.
1401  * \param buffer_count  [out] Number of buffers returned.
1402  *
1403  * \see intel_update_renderbuffers()
1404  */
1405
1406 static void
1407 intel_update_image_buffer(struct brw_context *intel,
1408                           __DRIdrawable *drawable,
1409                           struct intel_renderbuffer *rb,
1410                           __DRIimage *buffer,
1411                           enum __DRIimageBufferMask buffer_type)
1412 {
1413    struct gl_framebuffer *fb = drawable->driverPrivate;
1414
1415    if (!rb || !buffer->bo)
1416       return;
1417
1418    unsigned num_samples = rb->Base.Base.NumSamples;
1419
1420    /* Check and see if we're already bound to the right
1421     * buffer object
1422     */
1423    struct intel_mipmap_tree *last_mt;
1424    if (num_samples == 0)
1425       last_mt = rb->mt;
1426    else
1427       last_mt = rb->singlesample_mt;
1428
1429    if (last_mt && last_mt->bo == buffer->bo)
1430       return;
1431
1432    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1433                                             buffer->width, buffer->height,
1434                                             buffer->pitch);
1435
1436    if (brw_is_front_buffer_drawing(fb) &&
1437        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1438        rb->Base.Base.NumSamples > 1) {
1439       intel_renderbuffer_upsample(intel, rb);
1440    }
1441 }
1442
1443 static void
1444 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1445 {
1446    struct gl_framebuffer *fb = drawable->driverPrivate;
1447    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1448    struct intel_renderbuffer *front_rb;
1449    struct intel_renderbuffer *back_rb;
1450    struct __DRIimageList images;
1451    unsigned int format;
1452    uint32_t buffer_mask = 0;
1453
1454    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1455    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1456
1457    if (back_rb)
1458       format = intel_rb_format(back_rb);
1459    else if (front_rb)
1460       format = intel_rb_format(front_rb);
1461    else
1462       return;
1463
1464    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1465                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1466       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1467    }
1468
1469    if (back_rb)
1470       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1471
1472    (*screen->image.loader->getBuffers) (drawable,
1473                                         driGLFormatToImageFormat(format),
1474                                         &drawable->dri2.stamp,
1475                                         drawable->loaderPrivate,
1476                                         buffer_mask,
1477                                         &images);
1478
1479    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1480       drawable->w = images.front->width;
1481       drawable->h = images.front->height;
1482       intel_update_image_buffer(brw,
1483                                 drawable,
1484                                 front_rb,
1485                                 images.front,
1486                                 __DRI_IMAGE_BUFFER_FRONT);
1487    }
1488    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1489       drawable->w = images.back->width;
1490       drawable->h = images.back->height;
1491       intel_update_image_buffer(brw,
1492                                 drawable,
1493                                 back_rb,
1494                                 images.back,
1495                                 __DRI_IMAGE_BUFFER_BACK);
1496    }
1497 }