src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44
  45 #include "vbo/vbo_context.h"
  46
  47 #include "drivers/common/driverfuncs.h"
  48 #include "drivers/common/meta.h"
  49 #include "utils.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_defines.h"
  53 #include "brw_draw.h"
  54 #include "brw_state.h"
  55
  56 #include "intel_batchbuffer.h"
  57 #include "intel_buffer_objects.h"
  58 #include "intel_buffers.h"
  59 #include "intel_fbo.h"
  60 #include "intel_mipmap_tree.h"
  61 #include "intel_pixel.h"
  62 #include "intel_image.h"
  63 #include "intel_tex.h"
  64 #include "intel_tex_obj.h"
  65
  66 #include "swrast_setup/swrast_setup.h"
  67 #include "tnl/tnl.h"
  68 #include "tnl/t_pipeline.h"
  69 #include "util/ralloc.h"
  70
  71 #include "glsl/nir/nir.h"
  72
  73 /***************************************
  74  * Mesa's Driver Functions
  75  ***************************************/
  76
  77 static size_t
  78 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  79                              GLenum internalFormat, int samples[16])
  80 {
  81    struct brw_context *brw = brw_context(ctx);
  82
  83    (void) target;
  84
  85    switch (brw->gen) {
  86    case 9:
  87    case 8:
  88       samples[0] = 8;
  89       samples[1] = 4;
  90       samples[2] = 2;
  91       return 3;
  92
  93    case 7:
  94       samples[0] = 8;
  95       samples[1] = 4;
  96       return 2;
  97
  98    case 6:
  99       samples[0] = 4;
 100       return 1;
 101
 102    default:
 103       assert(brw->gen < 6);
 104       samples[0] = 1;
 105       return 1;
 106    }
 107 }
 108
 109 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 110
 111 const char *
 112 brw_get_renderer_string(unsigned deviceID)
 113 {
 114    const char *chipset;
 115    static char buffer[128];
 116
 117    switch (deviceID) {
 118 #undef CHIPSET
 119 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 120 #include "pci_ids/i965_pci_ids.h"
 121    default:
 122       chipset = "Unknown Intel Chipset";
 123       break;
 124    }
 125
 126    (void) driGetRendererString(buffer, chipset, 0);
 127    return buffer;
 128 }
 129
 130 static const GLubyte *
 131 intel_get_string(struct gl_context * ctx, GLenum name)
 132 {
 133    const struct brw_context *const brw = brw_context(ctx);
 134
 135    switch (name) {
 136    case GL_VENDOR:
 137       return (GLubyte *) brw_vendor_string;
 138
 139    case GL_RENDERER:
 140       return
 141          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 142
 143    default:
 144       return NULL;
 145    }
 146 }
 147
 148 static void
 149 intel_viewport(struct gl_context *ctx)
 150 {
 151    struct brw_context *brw = brw_context(ctx);
 152    __DRIcontext *driContext = brw->driContext;
 153
 154    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 155       dri2InvalidateDrawable(driContext->driDrawablePriv);
 156       dri2InvalidateDrawable(driContext->driReadablePriv);
 157    }
 158 }
 159
 160 static void
 161 intel_update_state(struct gl_context * ctx, GLuint new_state)
 162 {
 163    struct brw_context *brw = brw_context(ctx);
 164    struct intel_texture_object *tex_obj;
 165    struct intel_renderbuffer *depth_irb;
 166
 167    if (ctx->swrast_context)
 168       _swrast_InvalidateState(ctx, new_state);
 169    _vbo_InvalidateState(ctx, new_state);
 170
 171    brw->NewGLState |= new_state;
 172
 173    _mesa_unlock_context_textures(ctx);
 174
 175    /* Resolve the depth buffer's HiZ buffer. */
 176    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 177    if (depth_irb)
 178       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 179
 180    /* Resolve depth buffer and render cache of each enabled texture. */
 181    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 182    for (int i = 0; i <= maxEnabledUnit; i++) {
 183       if (!ctx->Texture.Unit[i]._Current)
 184          continue;
 185       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 186       if (!tex_obj || !tex_obj->mt)
 187          continue;
 188       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 189       intel_miptree_resolve_color(brw, tex_obj->mt);
 190       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 191    }
 192
 193    _mesa_lock_context_textures(ctx);
 194 }
 195
 196 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 197
 198 static void
 199 intel_flush_front(struct gl_context *ctx)
 200 {
 201    struct brw_context *brw = brw_context(ctx);
 202    __DRIcontext *driContext = brw->driContext;
 203    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 204    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 205
 206    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 207       if (flushFront(screen) && driDrawable &&
 208           driDrawable->loaderPrivate) {
 209
 210          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 211           *
 212           * This potentially resolves both front and back buffer. It
 213           * is unnecessary to resolve the back, but harms nothing except
 214           * performance. And no one cares about front-buffer render
 215           * performance.
 216           */
 217          intel_resolve_for_dri2_flush(brw, driDrawable);
 218          intel_batchbuffer_flush(brw);
 219
 220          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 221
 222          /* We set the dirty bit in intel_prepare_render() if we're
 223           * front buffer rendering once we get there.
 224           */
 225          brw->front_buffer_dirty = false;
 226       }
 227    }
 228 }
 229
 230 static void
 231 intel_glFlush(struct gl_context *ctx)
 232 {
 233    struct brw_context *brw = brw_context(ctx);
 234
 235    intel_batchbuffer_flush(brw);
 236    intel_flush_front(ctx);
 237
 238    brw->need_flush_throttle = true;
 239 }
 240
 241 static void
 242 intel_finish(struct gl_context * ctx)
 243 {
 244    struct brw_context *brw = brw_context(ctx);
 245
 246    intel_glFlush(ctx);
 247
 248    if (brw->batch.last_bo)
 249       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 250 }
 251
 252 static void
 253 brw_init_driver_functions(struct brw_context *brw,
 254                           struct dd_function_table *functions)
 255 {
 256    _mesa_init_driver_functions(functions);
 257
 258    /* GLX uses DRI2 invalidate events to handle window resizing.
 259     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 260     * which doesn't provide a mechanism for snooping the event queues.
 261     *
 262     * So EGL still relies on viewport hacks to handle window resizing.
 263     * This should go away with DRI3000.
 264     */
 265    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 266       functions->Viewport = intel_viewport;
 267
 268    functions->Flush = intel_glFlush;
 269    functions->Finish = intel_finish;
 270    functions->GetString = intel_get_string;
 271    functions->UpdateState = intel_update_state;
 272
 273    intelInitTextureFuncs(functions);
 274    intelInitTextureImageFuncs(functions);
 275    intelInitTextureSubImageFuncs(functions);
 276    intelInitTextureCopyImageFuncs(functions);
 277    intelInitCopyImageFuncs(functions);
 278    intelInitClearFuncs(functions);
 279    intelInitBufferFuncs(functions);
 280    intelInitPixelFuncs(functions);
 281    intelInitBufferObjectFuncs(functions);
 282    intel_init_syncobj_functions(functions);
 283    brw_init_object_purgeable_functions(functions);
 284
 285    brwInitFragProgFuncs( functions );
 286    brw_init_common_queryobj_functions(functions);
 287    if (brw->gen >= 6)
 288       gen6_init_queryobj_functions(functions);
 289    else
 290       gen4_init_queryobj_functions(functions);
 291
 292    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 293
 294    functions->NewTransformFeedback = brw_new_transform_feedback;
 295    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 296    functions->GetTransformFeedbackVertexCount =
 297       brw_get_transform_feedback_vertex_count;
 298    if (brw->gen >= 7) {
 299       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 300       functions->EndTransformFeedback = gen7_end_transform_feedback;
 301       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 302       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 303    } else {
 304       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 305       functions->EndTransformFeedback = brw_end_transform_feedback;
 306    }
 307
 308    if (brw->gen >= 6)
 309       functions->GetSamplePosition = gen6_get_sample_position;
 310 }
 311
 312 static void
 313 brw_initialize_context_constants(struct brw_context *brw)
 314 {
 315    struct gl_context *ctx = &brw->ctx;
 316
 317    unsigned max_samplers =
 318       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 319
 320    ctx->Const.QueryCounterBits.Timestamp = 36;
 321
 322    ctx->Const.StripTextureBorder = true;
 323
 324    ctx->Const.MaxDualSourceDrawBuffers = 1;
 325    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 326    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 327    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 328    ctx->Const.MaxTextureUnits =
 329       MIN2(ctx->Const.MaxTextureCoordUnits,
 330            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 331    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 332    if (brw->gen >= 6)
 333       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 334    else
 335       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 336    if (_mesa_extension_override_enables.ARB_compute_shader) {
 337       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 338       ctx->Const.MaxUniformBufferBindings += 12;
 339    } else {
 340       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 341    }
 342    ctx->Const.MaxCombinedTextureImageUnits =
 343       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 344       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 345       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 346       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 347
 348    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 349    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 350       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 351    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 352    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 353    ctx->Const.MaxTextureMbytes = 1536;
 354
 355    if (brw->gen >= 7)
 356       ctx->Const.MaxArrayTextureLayers = 2048;
 357    else
 358       ctx->Const.MaxArrayTextureLayers = 512;
 359
 360    ctx->Const.MaxTextureRectSize = 1 << 12;
 361
 362    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 363
 364    ctx->Const.MaxRenderbufferSize = 8192;
 365
 366    /* Hardware only supports a limited number of transform feedback buffers.
 367     * So we need to override the Mesa default (which is based only on software
 368     * limits).
 369     */
 370    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 371
 372    /* On Gen6, in the worst case, we use up one binding table entry per
 373     * transform feedback component (see comments above the definition of
 374     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 375     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 376     * BRW_MAX_SOL_BINDINGS.
 377     *
 378     * In "separate components" mode, we need to divide this value by
 379     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 380     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 381     */
 382    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 383    ctx->Const.MaxTransformFeedbackSeparateComponents =
 384       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 385
 386    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 387
 388    int max_samples;
 389    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 390    const int clamp_max_samples =
 391       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 392
 393    if (clamp_max_samples < 0) {
 394       max_samples = msaa_modes[0];
 395    } else {
 396       /* Select the largest supported MSAA mode that does not exceed
 397        * clamp_max_samples.
 398        */
 399       max_samples = 0;
 400       for (int i = 0; msaa_modes[i] != 0; ++i) {
 401          if (msaa_modes[i] <= clamp_max_samples) {
 402             max_samples = msaa_modes[i];
 403             break;
 404          }
 405       }
 406    }
 407
 408    ctx->Const.MaxSamples = max_samples;
 409    ctx->Const.MaxColorTextureSamples = max_samples;
 410    ctx->Const.MaxDepthTextureSamples = max_samples;
 411    ctx->Const.MaxIntegerSamples = max_samples;
 412
 413    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 414     * to map indices of rectangular grid to sample numbers within a pixel.
 415     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 416     * extension implementation. For more details see the comment above
 417     * gen6_set_sample_maps() definition.
 418     */
 419    gen6_set_sample_maps(ctx);
 420
 421    if (brw->gen >= 7)
 422       ctx->Const.MaxProgramTextureGatherComponents = 4;
 423    else if (brw->gen == 6)
 424       ctx->Const.MaxProgramTextureGatherComponents = 1;
 425
 426    ctx->Const.MinLineWidth = 1.0;
 427    ctx->Const.MinLineWidthAA = 1.0;
 428    if (brw->gen >= 9 || brw->is_cherryview) {
 429       ctx->Const.MaxLineWidth = 40.0;
 430       ctx->Const.MaxLineWidthAA = 40.0;
 431       ctx->Const.LineWidthGranularity = 0.125;
 432    } else if (brw->gen >= 6) {
 433       ctx->Const.MaxLineWidth = 7.375;
 434       ctx->Const.MaxLineWidthAA = 7.375;
 435       ctx->Const.LineWidthGranularity = 0.125;
 436    } else {
 437       ctx->Const.MaxLineWidth = 7.0;
 438       ctx->Const.MaxLineWidthAA = 7.0;
 439       ctx->Const.LineWidthGranularity = 0.5;
 440    }
 441
 442    ctx->Const.MinPointSize = 1.0;
 443    ctx->Const.MinPointSizeAA = 1.0;
 444    ctx->Const.MaxPointSize = 255.0;
 445    ctx->Const.MaxPointSizeAA = 255.0;
 446    ctx->Const.PointSizeGranularity = 1.0;
 447
 448    if (brw->gen >= 5 || brw->is_g4x)
 449       ctx->Const.MaxClipPlanes = 8;
 450
 451    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 452    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 453    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 454    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 455    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 456    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 457    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 458    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 459    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 460    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 461    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 462    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 463       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 464            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 465
 466    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 467    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 468    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 469    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 470    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 471    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 472    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 473    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 474    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 475       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 476            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 477
 478    /* Fragment shaders use real, 32-bit twos-complement integers for all
 479     * integer types.
 480     */
 481    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 482    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 483    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 484    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 485    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 486
 487    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 488    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 489    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 490    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 491    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 492
 493    if (brw->gen >= 7) {
 494       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 495       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 496       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 497       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 498       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 499       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 500       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 501       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 502       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 503    }
 504
 505    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 506     * but we're not sure how it's actually done for vertex order,
 507     * that affect provoking vertex decision. Always use last vertex
 508     * convention for quad primitive which works as expected for now.
 509     */
 510    if (brw->gen >= 6)
 511       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 512
 513    ctx->Const.NativeIntegers = true;
 514    ctx->Const.VertexID_is_zero_based = true;
 515
 516    /* Regarding the CMP instruction, the Ivybridge PRM says:
 517     *
 518     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 519     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 520     *    0xFFFFFFFF) is assigned to dst."
 521     *
 522     * but PRMs for earlier generations say
 523     *
 524     *   "In dword format, one GRF may store up to 8 results. When the register
 525     *    is used later as a vector of Booleans, as only LSB at each channel
 526     *    contains meaning [sic] data, software should make sure all higher bits
 527     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 528     *
 529     * We select the representation of a true boolean uniform to be ~0, and fix
 530     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 531     */
 532    ctx->Const.UniformBooleanTrue = ~0;
 533
 534    /* From the gen4 PRM, volume 4 page 127:
 535     *
 536     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 537     *      the base address of the first element of the surface, computed in
 538     *      software by adding the surface base address to the byte offset of
 539     *      the element in the buffer."
 540     *
 541     * However, unaligned accesses are slower, so enforce buffer alignment.
 542     */
 543    ctx->Const.UniformBufferOffsetAlignment = 16;
 544    ctx->Const.TextureBufferOffsetAlignment = 16;
 545
 546    if (brw->gen >= 6) {
 547       ctx->Const.MaxVarying = 32;
 548       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 549       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 550       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 551       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 552    }
 553
 554    static const nir_shader_compiler_options nir_options = {
 555       .native_integers = true,
 556       /* In order to help allow for better CSE at the NIR level we tell NIR
 557        * to split all ffma instructions during opt_algebraic and we then
 558        * re-combine them as a later step.
 559        */
 560       .lower_ffma = true,
 561       .lower_sub = true,
 562    };
 563
 564    /* We want the GLSL compiler to emit code that uses condition codes */
 565    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 566       ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 567       ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
 568       ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
 569       ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
 570       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 571       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
 572          (i == MESA_SHADER_FRAGMENT);
 573       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
 574          (i == MESA_SHADER_FRAGMENT);
 575       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 576       ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
 577    }
 578
 579    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 580    ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 581
 582    if (brw->scalar_vs) {
 583       /* If we're using the scalar backend for vertex shaders, we need to
 584        * configure these accordingly.
 585        */
 586       ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
 587       ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
 588       ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
 589
 590       if (brw_env_var_as_boolean("INTEL_USE_NIR", false))
 591          ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options;
 592    }
 593
 594    if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
 595       ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options;
 596
 597    /* ARB_viewport_array */
 598    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 599       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 600       ctx->Const.ViewportSubpixelBits = 0;
 601
 602       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 603        */
 604       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 605       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 606    }
 607
 608    /* ARB_gpu_shader5 */
 609    if (brw->gen >= 7)
 610       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 611 }
 612
 613 /**
 614  * Process driconf (drirc) options, setting appropriate context flags.
 615  *
 616  * intelInitExtensions still pokes at optionCache directly, in order to
 617  * avoid advertising various extensions.  No flags are set, so it makes
 618  * sense to continue doing that there.
 619  */
 620 static void
 621 brw_process_driconf_options(struct brw_context *brw)
 622 {
 623    struct gl_context *ctx = &brw->ctx;
 624
 625    driOptionCache *options = &brw->optionCache;
 626    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 627                        brw->driContext->driScreenPriv->myNum, "i965");
 628
 629    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 630    switch (bo_reuse_mode) {
 631    case DRI_CONF_BO_REUSE_DISABLED:
 632       break;
 633    case DRI_CONF_BO_REUSE_ALL:
 634       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 635       break;
 636    }
 637
 638    if (!driQueryOptionb(options, "hiz")) {
 639        brw->has_hiz = false;
 640        /* On gen6, you can only do separate stencil with HIZ. */
 641        if (brw->gen == 6)
 642           brw->has_separate_stencil = false;
 643    }
 644
 645    if (driQueryOptionb(options, "always_flush_batch")) {
 646       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 647       brw->always_flush_batch = true;
 648    }
 649
 650    if (driQueryOptionb(options, "always_flush_cache")) {
 651       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 652       brw->always_flush_cache = true;
 653    }
 654
 655    if (driQueryOptionb(options, "disable_throttling")) {
 656       fprintf(stderr, "disabling flush throttling\n");
 657       brw->disable_throttling = true;
 658    }
 659
 660    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 661
 662    ctx->Const.ForceGLSLExtensionsWarn =
 663       driQueryOptionb(options, "force_glsl_extensions_warn");
 664
 665    ctx->Const.DisableGLSLLineContinuations =
 666       driQueryOptionb(options, "disable_glsl_line_continuations");
 667
 668    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 669       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 670 }
 671
 672 /* drop when libdrm 2.4.61 is released */
 673 #ifndef I915_PARAM_REVISION
 674 #define I915_PARAM_REVISION 32
 675 #endif
 676
 677 static int
 678 brw_get_revision(int fd)
 679 {
 680    struct drm_i915_getparam gp;
 681    int revision;
 682    int ret;
 683
 684    memset(&gp, 0, sizeof(gp));
 685    gp.param = I915_PARAM_REVISION;
 686    gp.value = &revision;
 687
 688    ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
 689    if (ret)
 690       revision = -1;
 691
 692    return revision;
 693 }
 694
 695 GLboolean
 696 brwCreateContext(gl_api api,
 697                  const struct gl_config *mesaVis,
 698                  __DRIcontext *driContextPriv,
 699                  unsigned major_version,
 700                  unsigned minor_version,
 701                  uint32_t flags,
 702                  bool notify_reset,
 703                  unsigned *dri_ctx_error,
 704                  void *sharedContextPrivate)
 705 {
 706    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 707    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 708    struct intel_screen *screen = sPriv->driverPrivate;
 709    const struct brw_device_info *devinfo = screen->devinfo;
 710    struct dd_function_table functions;
 711
 712    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 713     * provides us with context reset notifications.
 714     */
 715    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 716       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 717
 718    if (screen->has_context_reset_notification)
 719       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 720
 721    if (flags & ~allowed_flags) {
 722       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 723       return false;
 724    }
 725
 726    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 727    if (!brw) {
 728       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 729       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 730       return false;
 731    }
 732
 733    driContextPriv->driverPrivate = brw;
 734    brw->driContext = driContextPriv;
 735    brw->intelScreen = screen;
 736    brw->bufmgr = screen->bufmgr;
 737
 738    brw->gen = devinfo->gen;
 739    brw->gt = devinfo->gt;
 740    brw->is_g4x = devinfo->is_g4x;
 741    brw->is_baytrail = devinfo->is_baytrail;
 742    brw->is_haswell = devinfo->is_haswell;
 743    brw->is_cherryview = devinfo->is_cherryview;
 744    brw->has_llc = devinfo->has_llc;
 745    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 746    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 747    brw->has_pln = devinfo->has_pln;
 748    brw->has_compr4 = devinfo->has_compr4;
 749    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 750    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 751    brw->needs_unlit_centroid_workaround =
 752       devinfo->needs_unlit_centroid_workaround;
 753    brw->revision = brw_get_revision(sPriv->fd);
 754
 755    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 756    brw->has_swizzling = screen->hw_has_swizzling;
 757
 758    brw->vs.base.stage = MESA_SHADER_VERTEX;
 759    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 760    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 761    if (brw->gen >= 8) {
 762       gen8_init_vtable_surface_functions(brw);
 763       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 764    } else if (brw->gen >= 7) {
 765       gen7_init_vtable_surface_functions(brw);
 766       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 767    } else if (brw->gen >= 6) {
 768       gen6_init_vtable_surface_functions(brw);
 769       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 770    } else {
 771       gen4_init_vtable_surface_functions(brw);
 772       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 773    }
 774
 775    brw_init_driver_functions(brw, &functions);
 776
 777    if (notify_reset)
 778       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 779
 780    struct gl_context *ctx = &brw->ctx;
 781
 782    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 783       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 784       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 785       intelDestroyContext(driContextPriv);
 786       return false;
 787    }
 788
 789    driContextSetFlags(ctx, flags);
 790
 791    /* Initialize the software rasterizer and helper modules.
 792     *
 793     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 794     * software fallbacks (which we have to support on legacy GL to do weird
 795     * glDrawPixels(), glBitmap(), and other functions).
 796     */
 797    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 798       _swrast_CreateContext(ctx);
 799    }
 800
 801    _vbo_CreateContext(ctx);
 802    if (ctx->swrast_context) {
 803       _tnl_CreateContext(ctx);
 804       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 805       _swsetup_CreateContext(ctx);
 806
 807       /* Configure swrast to match hardware characteristics: */
 808       _swrast_allow_pixel_fog(ctx, false);
 809       _swrast_allow_vertex_fog(ctx, true);
 810    }
 811
 812    _mesa_meta_init(ctx);
 813
 814    brw_process_driconf_options(brw);
 815    brw_process_intel_debug_variable(brw);
 816
 817    if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
 818       brw->scalar_vs = true;
 819
 820    brw_initialize_context_constants(brw);
 821
 822    ctx->Const.ResetStrategy = notify_reset
 823       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 824
 825    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 826    _mesa_init_point(ctx);
 827
 828    intel_fbo_init(brw);
 829
 830    intel_batchbuffer_init(brw);
 831
 832    if (brw->gen >= 6) {
 833       /* Create a new hardware context.  Using a hardware context means that
 834        * our GPU state will be saved/restored on context switch, allowing us
 835        * to assume that the GPU is in the same state we left it in.
 836        *
 837        * This is required for transform feedback buffer offsets, query objects,
 838        * and also allows us to reduce how much state we have to emit.
 839        */
 840       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 841
 842       if (!brw->hw_ctx) {
 843          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 844          intelDestroyContext(driContextPriv);
 845          return false;
 846       }
 847    }
 848
 849    brw_init_state(brw);
 850
 851    intelInitExtensions(ctx);
 852
 853    brw_init_surface_formats(brw);
 854
 855    brw->max_vs_threads = devinfo->max_vs_threads;
 856    brw->max_hs_threads = devinfo->max_hs_threads;
 857    brw->max_ds_threads = devinfo->max_ds_threads;
 858    brw->max_gs_threads = devinfo->max_gs_threads;
 859    brw->max_wm_threads = devinfo->max_wm_threads;
 860    brw->urb.size = devinfo->urb.size;
 861    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 862    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 863    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 864    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 865    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 866
 867    /* Estimate the size of the mappable aperture into the GTT.  There's an
 868     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 869     * It turns out it's basically always 256MB, though some ancient hardware
 870     * was smaller.
 871     */
 872    uint32_t gtt_size = 256 * 1024 * 1024;
 873
 874    /* We don't want to map two objects such that a memcpy between them would
 875     * just fault one mapping in and then the other over and over forever.  So
 876     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 877     * taken up by things like the framebuffer and the ringbuffer and such, so
 878     * be more conservative.
 879     */
 880    brw->max_gtt_map_object_size = gtt_size / 4;
 881
 882    if (brw->gen == 6)
 883       brw->urb.gs_present = false;
 884
 885    brw->prim_restart.in_progress = false;
 886    brw->prim_restart.enable_cut_index = false;
 887    brw->gs.enabled = false;
 888    brw->sf.viewport_transform_enable = true;
 889
 890    ctx->VertexProgram._MaintainTnlProgram = true;
 891    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 892
 893    brw_draw_init( brw );
 894
 895    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 896       /* Turn on some extra GL_ARB_debug_output generation. */
 897       brw->perf_debug = true;
 898    }
 899
 900    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 901       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 902
 903    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 904       brw_init_shader_time(brw);
 905
 906    _mesa_compute_version(ctx);
 907
 908    _mesa_initialize_dispatch_tables(ctx);
 909    _mesa_initialize_vbo_vtxfmt(ctx);
 910
 911    if (ctx->Extensions.AMD_performance_monitor) {
 912       brw_init_performance_monitors(brw);
 913    }
 914
 915    vbo_use_buffer_objects(ctx);
 916    vbo_always_unmap_buffers(ctx);
 917
 918    return true;
 919 }
 920
 921 void
 922 intelDestroyContext(__DRIcontext * driContextPriv)
 923 {
 924    struct brw_context *brw =
 925       (struct brw_context *) driContextPriv->driverPrivate;
 926    struct gl_context *ctx = &brw->ctx;
 927
 928    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 929    if (INTEL_DEBUG & DEBUG_AUB) {
 930       intel_batchbuffer_flush(brw);
 931       aub_dump_bmp(&brw->ctx);
 932    }
 933
 934    _mesa_meta_free(&brw->ctx);
 935    brw_meta_fast_clear_free(brw);
 936
 937    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 938       /* Force a report. */
 939       brw->shader_time.report_time = 0;
 940
 941       brw_collect_and_report_shader_time(brw);
 942       brw_destroy_shader_time(brw);
 943    }
 944
 945    brw_destroy_state(brw);
 946    brw_draw_destroy(brw);
 947
 948    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 949    if (brw->vs.base.scratch_bo)
 950       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
 951    if (brw->gs.base.scratch_bo)
 952       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
 953    if (brw->wm.base.scratch_bo)
 954       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 955
 956    drm_intel_gem_context_destroy(brw->hw_ctx);
 957
 958    if (ctx->swrast_context) {
 959       _swsetup_DestroyContext(&brw->ctx);
 960       _tnl_DestroyContext(&brw->ctx);
 961    }
 962    _vbo_DestroyContext(&brw->ctx);
 963
 964    if (ctx->swrast_context)
 965       _swrast_DestroyContext(&brw->ctx);
 966
 967    intel_batchbuffer_free(brw);
 968
 969    drm_intel_bo_unreference(brw->throttle_batch[1]);
 970    drm_intel_bo_unreference(brw->throttle_batch[0]);
 971    brw->throttle_batch[1] = NULL;
 972    brw->throttle_batch[0] = NULL;
 973
 974    driDestroyOptionCache(&brw->optionCache);
 975
 976    /* free the Mesa context */
 977    _mesa_free_context_data(&brw->ctx);
 978
 979    ralloc_free(brw);
 980    driContextPriv->driverPrivate = NULL;
 981 }
 982
 983 GLboolean
 984 intelUnbindContext(__DRIcontext * driContextPriv)
 985 {
 986    /* Unset current context and dispath table */
 987    _mesa_make_current(NULL, NULL, NULL);
 988
 989    return true;
 990 }
 991
 992 /**
 993  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 994  * on window system framebuffers.
 995  *
 996  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 997  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 998  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 999  * for a visual where you're guaranteed to be capable, but it turns out that
1000  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1001  * incapable ones, becuase there's no difference between the two in resources
1002  * used.  Applications thus get built that accidentally rely on the default
1003  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1004  * great...
1005  *
1006  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1007  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1008  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1009  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1010  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1011  * and get no sRGB encode (assuming that both kinds of visual are available).
1012  * Thus our choice to support sRGB by default on our visuals for desktop would
1013  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1014  *
1015  * Unfortunately, renderbuffer setup happens before a context is created.  So
1016  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1017  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1018  * yet), we go turn that back off before anyone finds out.
1019  */
1020 static void
1021 intel_gles3_srgb_workaround(struct brw_context *brw,
1022                             struct gl_framebuffer *fb)
1023 {
1024    struct gl_context *ctx = &brw->ctx;
1025
1026    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1027       return;
1028
1029    /* Some day when we support the sRGB capable bit on visuals available for
1030     * GLES, we'll need to respect that and not disable things here.
1031     */
1032    fb->Visual.sRGBCapable = false;
1033    for (int i = 0; i < BUFFER_COUNT; i++) {
1034       if (fb->Attachment[i].Renderbuffer &&
1035           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1036          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1037       }
1038    }
1039 }
1040
1041 GLboolean
1042 intelMakeCurrent(__DRIcontext * driContextPriv,
1043                  __DRIdrawable * driDrawPriv,
1044                  __DRIdrawable * driReadPriv)
1045 {
1046    struct brw_context *brw;
1047    GET_CURRENT_CONTEXT(curCtx);
1048
1049    if (driContextPriv)
1050       brw = (struct brw_context *) driContextPriv->driverPrivate;
1051    else
1052       brw = NULL;
1053
1054    /* According to the glXMakeCurrent() man page: "Pending commands to
1055     * the previous context, if any, are flushed before it is released."
1056     * But only flush if we're actually changing contexts.
1057     */
1058    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1059       _mesa_flush(curCtx);
1060    }
1061
1062    if (driContextPriv) {
1063       struct gl_context *ctx = &brw->ctx;
1064       struct gl_framebuffer *fb, *readFb;
1065
1066       if (driDrawPriv == NULL) {
1067          fb = _mesa_get_incomplete_framebuffer();
1068       } else {
1069          fb = driDrawPriv->driverPrivate;
1070          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1071       }
1072
1073       if (driReadPriv == NULL) {
1074          readFb = _mesa_get_incomplete_framebuffer();
1075       } else {
1076          readFb = driReadPriv->driverPrivate;
1077          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1078       }
1079
1080       /* The sRGB workaround changes the renderbuffer's format. We must change
1081        * the format before the renderbuffer's miptree get's allocated, otherwise
1082        * the formats of the renderbuffer and its miptree will differ.
1083        */
1084       intel_gles3_srgb_workaround(brw, fb);
1085       intel_gles3_srgb_workaround(brw, readFb);
1086
1087       /* If the context viewport hasn't been initialized, force a call out to
1088        * the loader to get buffers so we have a drawable size for the initial
1089        * viewport. */
1090       if (!brw->ctx.ViewportInitialized)
1091          intel_prepare_render(brw);
1092
1093       _mesa_make_current(ctx, fb, readFb);
1094    } else {
1095       _mesa_make_current(NULL, NULL, NULL);
1096    }
1097
1098    return true;
1099 }
1100
1101 void
1102 intel_resolve_for_dri2_flush(struct brw_context *brw,
1103                              __DRIdrawable *drawable)
1104 {
1105    if (brw->gen < 6) {
1106       /* MSAA and fast color clear are not supported, so don't waste time
1107        * checking whether a resolve is needed.
1108        */
1109       return;
1110    }
1111
1112    struct gl_framebuffer *fb = drawable->driverPrivate;
1113    struct intel_renderbuffer *rb;
1114
1115    /* Usually, only the back buffer will need to be downsampled. However,
1116     * the front buffer will also need it if the user has rendered into it.
1117     */
1118    static const gl_buffer_index buffers[2] = {
1119          BUFFER_BACK_LEFT,
1120          BUFFER_FRONT_LEFT,
1121    };
1122
1123    for (int i = 0; i < 2; ++i) {
1124       rb = intel_get_renderbuffer(fb, buffers[i]);
1125       if (rb == NULL || rb->mt == NULL)
1126          continue;
1127       if (rb->mt->num_samples <= 1)
1128          intel_miptree_resolve_color(brw, rb->mt);
1129       else
1130          intel_renderbuffer_downsample(brw, rb);
1131    }
1132 }
1133
1134 static unsigned
1135 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1136 {
1137    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1138 }
1139
1140 static void
1141 intel_query_dri2_buffers(struct brw_context *brw,
1142                          __DRIdrawable *drawable,
1143                          __DRIbuffer **buffers,
1144                          int *count);
1145
1146 static void
1147 intel_process_dri2_buffer(struct brw_context *brw,
1148                           __DRIdrawable *drawable,
1149                           __DRIbuffer *buffer,
1150                           struct intel_renderbuffer *rb,
1151                           const char *buffer_name);
1152
1153 static void
1154 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1155
1156 static void
1157 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1158 {
1159    struct gl_framebuffer *fb = drawable->driverPrivate;
1160    struct intel_renderbuffer *rb;
1161    __DRIbuffer *buffers = NULL;
1162    int i, count;
1163    const char *region_name;
1164
1165    /* Set this up front, so that in case our buffers get invalidated
1166     * while we're getting new buffers, we don't clobber the stamp and
1167     * thus ignore the invalidate. */
1168    drawable->lastStamp = drawable->dri2.stamp;
1169
1170    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1171       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1172
1173    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1174
1175    if (buffers == NULL)
1176       return;
1177
1178    for (i = 0; i < count; i++) {
1179        switch (buffers[i].attachment) {
1180        case __DRI_BUFFER_FRONT_LEFT:
1181            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1182            region_name = "dri2 front buffer";
1183            break;
1184
1185        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1186            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1187            region_name = "dri2 fake front buffer";
1188            break;
1189
1190        case __DRI_BUFFER_BACK_LEFT:
1191            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1192            region_name = "dri2 back buffer";
1193            break;
1194
1195        case __DRI_BUFFER_DEPTH:
1196        case __DRI_BUFFER_HIZ:
1197        case __DRI_BUFFER_DEPTH_STENCIL:
1198        case __DRI_BUFFER_STENCIL:
1199        case __DRI_BUFFER_ACCUM:
1200        default:
1201            fprintf(stderr,
1202                    "unhandled buffer attach event, attachment type %d\n",
1203                    buffers[i].attachment);
1204            return;
1205        }
1206
1207        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1208    }
1209
1210 }
1211
1212 void
1213 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1214 {
1215    struct brw_context *brw = context->driverPrivate;
1216    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1217
1218    /* Set this up front, so that in case our buffers get invalidated
1219     * while we're getting new buffers, we don't clobber the stamp and
1220     * thus ignore the invalidate. */
1221    drawable->lastStamp = drawable->dri2.stamp;
1222
1223    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1224       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1225
1226    if (screen->image.loader)
1227       intel_update_image_buffers(brw, drawable);
1228    else
1229       intel_update_dri2_buffers(brw, drawable);
1230
1231    driUpdateFramebufferSize(&brw->ctx, drawable);
1232 }
1233
1234 /**
1235  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1236  * state is required.
1237  */
1238 void
1239 intel_prepare_render(struct brw_context *brw)
1240 {
1241    struct gl_context *ctx = &brw->ctx;
1242    __DRIcontext *driContext = brw->driContext;
1243    __DRIdrawable *drawable;
1244
1245    drawable = driContext->driDrawablePriv;
1246    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1247       if (drawable->lastStamp != drawable->dri2.stamp)
1248          intel_update_renderbuffers(driContext, drawable);
1249       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1250    }
1251
1252    drawable = driContext->driReadablePriv;
1253    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1254       if (drawable->lastStamp != drawable->dri2.stamp)
1255          intel_update_renderbuffers(driContext, drawable);
1256       driContext->dri2.read_stamp = drawable->dri2.stamp;
1257    }
1258
1259    /* If we're currently rendering to the front buffer, the rendering
1260     * that will happen next will probably dirty the front buffer.  So
1261     * mark it as dirty here.
1262     */
1263    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1264       brw->front_buffer_dirty = true;
1265 }
1266
1267 /**
1268  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1269  *
1270  * To determine which DRI buffers to request, examine the renderbuffers
1271  * attached to the drawable's framebuffer. Then request the buffers with
1272  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1273  *
1274  * This is called from intel_update_renderbuffers().
1275  *
1276  * \param drawable      Drawable whose buffers are queried.
1277  * \param buffers       [out] List of buffers returned by DRI2 query.
1278  * \param buffer_count  [out] Number of buffers returned.
1279  *
1280  * \see intel_update_renderbuffers()
1281  * \see DRI2GetBuffers()
1282  * \see DRI2GetBuffersWithFormat()
1283  */
1284 static void
1285 intel_query_dri2_buffers(struct brw_context *brw,
1286                          __DRIdrawable *drawable,
1287                          __DRIbuffer **buffers,
1288                          int *buffer_count)
1289 {
1290    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1291    struct gl_framebuffer *fb = drawable->driverPrivate;
1292    int i = 0;
1293    unsigned attachments[8];
1294
1295    struct intel_renderbuffer *front_rb;
1296    struct intel_renderbuffer *back_rb;
1297
1298    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1299    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1300
1301    memset(attachments, 0, sizeof(attachments));
1302    if ((brw_is_front_buffer_drawing(fb) ||
1303         brw_is_front_buffer_reading(fb) ||
1304         !back_rb) && front_rb) {
1305       /* If a fake front buffer is in use, then querying for
1306        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1307        * the real front buffer to the fake front buffer.  So before doing the
1308        * query, we need to make sure all the pending drawing has landed in the
1309        * real front buffer.
1310        */
1311       intel_batchbuffer_flush(brw);
1312       intel_flush_front(&brw->ctx);
1313
1314       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1315       attachments[i++] = intel_bits_per_pixel(front_rb);
1316    } else if (front_rb && brw->front_buffer_dirty) {
1317       /* We have pending front buffer rendering, but we aren't querying for a
1318        * front buffer.  If the front buffer we have is a fake front buffer,
1319        * the X server is going to throw it away when it processes the query.
1320        * So before doing the query, make sure all the pending drawing has
1321        * landed in the real front buffer.
1322        */
1323       intel_batchbuffer_flush(brw);
1324       intel_flush_front(&brw->ctx);
1325    }
1326
1327    if (back_rb) {
1328       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1329       attachments[i++] = intel_bits_per_pixel(back_rb);
1330    }
1331
1332    assert(i <= ARRAY_SIZE(attachments));
1333
1334    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1335                                                         &drawable->w,
1336                                                         &drawable->h,
1337                                                         attachments, i / 2,
1338                                                         buffer_count,
1339                                                         drawable->loaderPrivate);
1340 }
1341
1342 /**
1343  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1344  *
1345  * This is called from intel_update_renderbuffers().
1346  *
1347  * \par Note:
1348  *    DRI buffers whose attachment point is DRI2BufferStencil or
1349  *    DRI2BufferDepthStencil are handled as special cases.
1350  *
1351  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1352  *        that is passed to drm_intel_bo_gem_create_from_name().
1353  *
1354  * \see intel_update_renderbuffers()
1355  */
1356 static void
1357 intel_process_dri2_buffer(struct brw_context *brw,
1358                           __DRIdrawable *drawable,
1359                           __DRIbuffer *buffer,
1360                           struct intel_renderbuffer *rb,
1361                           const char *buffer_name)
1362 {
1363    struct gl_framebuffer *fb = drawable->driverPrivate;
1364    drm_intel_bo *bo;
1365
1366    if (!rb)
1367       return;
1368
1369    unsigned num_samples = rb->Base.Base.NumSamples;
1370
1371    /* We try to avoid closing and reopening the same BO name, because the first
1372     * use of a mapping of the buffer involves a bunch of page faulting which is
1373     * moderately expensive.
1374     */
1375    struct intel_mipmap_tree *last_mt;
1376    if (num_samples == 0)
1377       last_mt = rb->mt;
1378    else
1379       last_mt = rb->singlesample_mt;
1380
1381    uint32_t old_name = 0;
1382    if (last_mt) {
1383        /* The bo already has a name because the miptree was created by a
1384         * previous call to intel_process_dri2_buffer(). If a bo already has a
1385         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1386         * create a new name.
1387         */
1388       drm_intel_bo_flink(last_mt->bo, &old_name);
1389    }
1390
1391    if (old_name == buffer->name)
1392       return;
1393
1394    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1395       fprintf(stderr,
1396               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1397               buffer->name, buffer->attachment,
1398               buffer->cpp, buffer->pitch);
1399    }
1400
1401    intel_miptree_release(&rb->mt);
1402    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1403                                           buffer->name);
1404    if (!bo) {
1405       fprintf(stderr,
1406               "Failed to open BO for returned DRI2 buffer "
1407               "(%dx%d, %s, named %d).\n"
1408               "This is likely a bug in the X Server that will lead to a "
1409               "crash soon.\n",
1410               drawable->w, drawable->h, buffer_name, buffer->name);
1411       return;
1412    }
1413
1414    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1415                                             drawable->w, drawable->h,
1416                                             buffer->pitch);
1417
1418    if (brw_is_front_buffer_drawing(fb) &&
1419        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1420         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1421        rb->Base.Base.NumSamples > 1) {
1422       intel_renderbuffer_upsample(brw, rb);
1423    }
1424
1425    assert(rb->mt);
1426
1427    drm_intel_bo_unreference(bo);
1428 }
1429
1430 /**
1431  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1432  *
1433  * To determine which DRI buffers to request, examine the renderbuffers
1434  * attached to the drawable's framebuffer. Then request the buffers from
1435  * the image loader
1436  *
1437  * This is called from intel_update_renderbuffers().
1438  *
1439  * \param drawable      Drawable whose buffers are queried.
1440  * \param buffers       [out] List of buffers returned by DRI2 query.
1441  * \param buffer_count  [out] Number of buffers returned.
1442  *
1443  * \see intel_update_renderbuffers()
1444  */
1445
1446 static void
1447 intel_update_image_buffer(struct brw_context *intel,
1448                           __DRIdrawable *drawable,
1449                           struct intel_renderbuffer *rb,
1450                           __DRIimage *buffer,
1451                           enum __DRIimageBufferMask buffer_type)
1452 {
1453    struct gl_framebuffer *fb = drawable->driverPrivate;
1454
1455    if (!rb || !buffer->bo)
1456       return;
1457
1458    unsigned num_samples = rb->Base.Base.NumSamples;
1459
1460    /* Check and see if we're already bound to the right
1461     * buffer object
1462     */
1463    struct intel_mipmap_tree *last_mt;
1464    if (num_samples == 0)
1465       last_mt = rb->mt;
1466    else
1467       last_mt = rb->singlesample_mt;
1468
1469    if (last_mt && last_mt->bo == buffer->bo)
1470       return;
1471
1472    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1473                                             buffer->width, buffer->height,
1474                                             buffer->pitch);
1475
1476    if (brw_is_front_buffer_drawing(fb) &&
1477        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1478        rb->Base.Base.NumSamples > 1) {
1479       intel_renderbuffer_upsample(intel, rb);
1480    }
1481 }
1482
1483 static void
1484 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1485 {
1486    struct gl_framebuffer *fb = drawable->driverPrivate;
1487    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1488    struct intel_renderbuffer *front_rb;
1489    struct intel_renderbuffer *back_rb;
1490    struct __DRIimageList images;
1491    unsigned int format;
1492    uint32_t buffer_mask = 0;
1493
1494    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1495    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1496
1497    if (back_rb)
1498       format = intel_rb_format(back_rb);
1499    else if (front_rb)
1500       format = intel_rb_format(front_rb);
1501    else
1502       return;
1503
1504    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1505                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1506       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1507    }
1508
1509    if (back_rb)
1510       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1511
1512    (*screen->image.loader->getBuffers) (drawable,
1513                                         driGLFormatToImageFormat(format),
1514                                         &drawable->dri2.stamp,
1515                                         drawable->loaderPrivate,
1516                                         buffer_mask,
1517                                         &images);
1518
1519    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1520       drawable->w = images.front->width;
1521       drawable->h = images.front->height;
1522       intel_update_image_buffer(brw,
1523                                 drawable,
1524                                 front_rb,
1525                                 images.front,
1526                                 __DRI_IMAGE_BUFFER_FRONT);
1527    }
1528    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1529       drawable->w = images.back->width;
1530       drawable->h = images.back->height;
1531       intel_update_image_buffer(brw,
1532                                 drawable,
1533                                 back_rb,
1534                                 images.back,
1535                                 __DRI_IMAGE_BUFFER_BACK);
1536    }
1537 }