src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44 #include "main/framebuffer.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/driverfuncs.h"
  49 #include "drivers/common/meta.h"
  50 #include "utils.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_defines.h"
  54 #include "brw_compiler.h"
  55 #include "brw_draw.h"
  56 #include "brw_state.h"
  57
  58 #include "intel_batchbuffer.h"
  59 #include "intel_buffer_objects.h"
  60 #include "intel_buffers.h"
  61 #include "intel_fbo.h"
  62 #include "intel_mipmap_tree.h"
  63 #include "intel_pixel.h"
  64 #include "intel_image.h"
  65 #include "intel_tex.h"
  66 #include "intel_tex_obj.h"
  67
  68 #include "swrast_setup/swrast_setup.h"
  69 #include "tnl/tnl.h"
  70 #include "tnl/t_pipeline.h"
  71 #include "util/ralloc.h"
  72
  73 /***************************************
  74  * Mesa's Driver Functions
  75  ***************************************/
  76
  77 static size_t
  78 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  79                              GLenum internalFormat, int samples[16])
  80 {
  81    struct brw_context *brw = brw_context(ctx);
  82
  83    (void) target;
  84
  85    switch (brw->gen) {
  86    case 9:
  87       samples[0] = 16;
  88       samples[1] = 8;
  89       samples[2] = 4;
  90       samples[3] = 2;
  91       return 4;
  92
  93    case 8:
  94       samples[0] = 8;
  95       samples[1] = 4;
  96       samples[2] = 2;
  97       return 3;
  98
  99    case 7:
 100       samples[0] = 8;
 101       samples[1] = 4;
 102       return 2;
 103
 104    case 6:
 105       samples[0] = 4;
 106       return 1;
 107
 108    default:
 109       assert(brw->gen < 6);
 110       samples[0] = 1;
 111       return 1;
 112    }
 113 }
 114
 115 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 116
 117 const char *
 118 brw_get_renderer_string(unsigned deviceID)
 119 {
 120    const char *chipset;
 121    static char buffer[128];
 122
 123    switch (deviceID) {
 124 #undef CHIPSET
 125 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 126 #include "pci_ids/i965_pci_ids.h"
 127    default:
 128       chipset = "Unknown Intel Chipset";
 129       break;
 130    }
 131
 132    (void) driGetRendererString(buffer, chipset, 0);
 133    return buffer;
 134 }
 135
 136 static const GLubyte *
 137 intel_get_string(struct gl_context * ctx, GLenum name)
 138 {
 139    const struct brw_context *const brw = brw_context(ctx);
 140
 141    switch (name) {
 142    case GL_VENDOR:
 143       return (GLubyte *) brw_vendor_string;
 144
 145    case GL_RENDERER:
 146       return
 147          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 148
 149    default:
 150       return NULL;
 151    }
 152 }
 153
 154 static void
 155 intel_viewport(struct gl_context *ctx)
 156 {
 157    struct brw_context *brw = brw_context(ctx);
 158    __DRIcontext *driContext = brw->driContext;
 159
 160    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 161       dri2InvalidateDrawable(driContext->driDrawablePriv);
 162       dri2InvalidateDrawable(driContext->driReadablePriv);
 163    }
 164 }
 165
 166 static void
 167 intel_update_state(struct gl_context * ctx, GLuint new_state)
 168 {
 169    struct brw_context *brw = brw_context(ctx);
 170    struct intel_texture_object *tex_obj;
 171    struct intel_renderbuffer *depth_irb;
 172
 173    if (ctx->swrast_context)
 174       _swrast_InvalidateState(ctx, new_state);
 175    _vbo_InvalidateState(ctx, new_state);
 176
 177    brw->NewGLState |= new_state;
 178
 179    _mesa_unlock_context_textures(ctx);
 180
 181    /* Resolve the depth buffer's HiZ buffer. */
 182    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 183    if (depth_irb)
 184       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 185
 186    /* Resolve depth buffer and render cache of each enabled texture. */
 187    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 188    for (int i = 0; i <= maxEnabledUnit; i++) {
 189       if (!ctx->Texture.Unit[i]._Current)
 190          continue;
 191       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 192       if (!tex_obj || !tex_obj->mt)
 193          continue;
 194       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 195       intel_miptree_resolve_color(brw, tex_obj->mt);
 196       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 197    }
 198
 199    _mesa_lock_context_textures(ctx);
 200 }
 201
 202 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 203
 204 static void
 205 intel_flush_front(struct gl_context *ctx)
 206 {
 207    struct brw_context *brw = brw_context(ctx);
 208    __DRIcontext *driContext = brw->driContext;
 209    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 210    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 211
 212    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 213       if (flushFront(screen) && driDrawable &&
 214           driDrawable->loaderPrivate) {
 215
 216          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 217           *
 218           * This potentially resolves both front and back buffer. It
 219           * is unnecessary to resolve the back, but harms nothing except
 220           * performance. And no one cares about front-buffer render
 221           * performance.
 222           */
 223          intel_resolve_for_dri2_flush(brw, driDrawable);
 224          intel_batchbuffer_flush(brw);
 225
 226          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 227
 228          /* We set the dirty bit in intel_prepare_render() if we're
 229           * front buffer rendering once we get there.
 230           */
 231          brw->front_buffer_dirty = false;
 232       }
 233    }
 234 }
 235
 236 static void
 237 intel_glFlush(struct gl_context *ctx)
 238 {
 239    struct brw_context *brw = brw_context(ctx);
 240
 241    intel_batchbuffer_flush(brw);
 242    intel_flush_front(ctx);
 243
 244    brw->need_flush_throttle = true;
 245 }
 246
 247 static void
 248 intel_finish(struct gl_context * ctx)
 249 {
 250    struct brw_context *brw = brw_context(ctx);
 251
 252    intel_glFlush(ctx);
 253
 254    if (brw->batch.last_bo)
 255       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 256 }
 257
 258 static void
 259 brw_init_driver_functions(struct brw_context *brw,
 260                           struct dd_function_table *functions)
 261 {
 262    _mesa_init_driver_functions(functions);
 263
 264    /* GLX uses DRI2 invalidate events to handle window resizing.
 265     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 266     * which doesn't provide a mechanism for snooping the event queues.
 267     *
 268     * So EGL still relies on viewport hacks to handle window resizing.
 269     * This should go away with DRI3000.
 270     */
 271    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 272       functions->Viewport = intel_viewport;
 273
 274    functions->Flush = intel_glFlush;
 275    functions->Finish = intel_finish;
 276    functions->GetString = intel_get_string;
 277    functions->UpdateState = intel_update_state;
 278
 279    intelInitTextureFuncs(functions);
 280    intelInitTextureImageFuncs(functions);
 281    intelInitTextureSubImageFuncs(functions);
 282    intelInitTextureCopyImageFuncs(functions);
 283    intelInitCopyImageFuncs(functions);
 284    intelInitClearFuncs(functions);
 285    intelInitBufferFuncs(functions);
 286    intelInitPixelFuncs(functions);
 287    intelInitBufferObjectFuncs(functions);
 288    intel_init_syncobj_functions(functions);
 289    brw_init_object_purgeable_functions(functions);
 290
 291    brwInitFragProgFuncs( functions );
 292    brw_init_common_queryobj_functions(functions);
 293    if (brw->gen >= 6)
 294       gen6_init_queryobj_functions(functions);
 295    else
 296       gen4_init_queryobj_functions(functions);
 297    brw_init_compute_functions(functions);
 298    if (brw->gen >= 7)
 299       brw_init_conditional_render_functions(functions);
 300
 301    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 302
 303    functions->NewTransformFeedback = brw_new_transform_feedback;
 304    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 305    functions->GetTransformFeedbackVertexCount =
 306       brw_get_transform_feedback_vertex_count;
 307    if (brw->gen >= 7) {
 308       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 309       functions->EndTransformFeedback = gen7_end_transform_feedback;
 310       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 311       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 312    } else {
 313       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 314       functions->EndTransformFeedback = brw_end_transform_feedback;
 315    }
 316
 317    if (brw->gen >= 6)
 318       functions->GetSamplePosition = gen6_get_sample_position;
 319 }
 320
 321 static void
 322 brw_initialize_context_constants(struct brw_context *brw)
 323 {
 324    struct gl_context *ctx = &brw->ctx;
 325
 326    unsigned max_samplers =
 327       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 328
 329    ctx->Const.QueryCounterBits.Timestamp = 36;
 330
 331    ctx->Const.StripTextureBorder = true;
 332
 333    ctx->Const.MaxUniformBlockSize = 65536;
 334    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 335       struct gl_program_constants *prog = &ctx->Const.Program[i];
 336       prog->MaxUniformBlocks = BRW_MAX_UBO;
 337       prog->MaxCombinedUniformComponents =
 338          prog->MaxUniformComponents +
 339          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 340    }
 341
 342    ctx->Const.MaxDualSourceDrawBuffers = 1;
 343    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 344    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 345    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 346    ctx->Const.MaxTextureUnits =
 347       MIN2(ctx->Const.MaxTextureCoordUnits,
 348            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 349    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 350    if (brw->gen >= 6)
 351       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 352    else
 353       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 354    if (_mesa_extension_override_enables.ARB_compute_shader) {
 355       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 356       ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
 357    } else {
 358       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 359    }
 360    ctx->Const.MaxCombinedTextureImageUnits =
 361       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 362       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 363       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 364       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 365
 366    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 367    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 368       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 369    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 370    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 371    ctx->Const.MaxTextureMbytes = 1536;
 372
 373    if (brw->gen >= 7)
 374       ctx->Const.MaxArrayTextureLayers = 2048;
 375    else
 376       ctx->Const.MaxArrayTextureLayers = 512;
 377
 378    ctx->Const.MaxTextureRectSize = 1 << 12;
 379
 380    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 381
 382    ctx->Const.MaxRenderbufferSize = 8192;
 383
 384    /* Hardware only supports a limited number of transform feedback buffers.
 385     * So we need to override the Mesa default (which is based only on software
 386     * limits).
 387     */
 388    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 389
 390    /* On Gen6, in the worst case, we use up one binding table entry per
 391     * transform feedback component (see comments above the definition of
 392     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 393     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 394     * BRW_MAX_SOL_BINDINGS.
 395     *
 396     * In "separate components" mode, we need to divide this value by
 397     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 398     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 399     */
 400    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 401    ctx->Const.MaxTransformFeedbackSeparateComponents =
 402       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 403
 404    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 405
 406    int max_samples;
 407    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 408    const int clamp_max_samples =
 409       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 410
 411    if (clamp_max_samples < 0) {
 412       max_samples = msaa_modes[0];
 413    } else {
 414       /* Select the largest supported MSAA mode that does not exceed
 415        * clamp_max_samples.
 416        */
 417       max_samples = 0;
 418       for (int i = 0; msaa_modes[i] != 0; ++i) {
 419          if (msaa_modes[i] <= clamp_max_samples) {
 420             max_samples = msaa_modes[i];
 421             break;
 422          }
 423       }
 424    }
 425
 426    ctx->Const.MaxSamples = max_samples;
 427    ctx->Const.MaxColorTextureSamples = max_samples;
 428    ctx->Const.MaxDepthTextureSamples = max_samples;
 429    ctx->Const.MaxIntegerSamples = max_samples;
 430
 431    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 432     * to map indices of rectangular grid to sample numbers within a pixel.
 433     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 434     * extension implementation. For more details see the comment above
 435     * gen6_set_sample_maps() definition.
 436     */
 437    gen6_set_sample_maps(ctx);
 438
 439    if (brw->gen >= 7)
 440       ctx->Const.MaxProgramTextureGatherComponents = 4;
 441    else if (brw->gen == 6)
 442       ctx->Const.MaxProgramTextureGatherComponents = 1;
 443
 444    ctx->Const.MinLineWidth = 1.0;
 445    ctx->Const.MinLineWidthAA = 1.0;
 446    if (brw->gen >= 6) {
 447       ctx->Const.MaxLineWidth = 7.375;
 448       ctx->Const.MaxLineWidthAA = 7.375;
 449       ctx->Const.LineWidthGranularity = 0.125;
 450    } else {
 451       ctx->Const.MaxLineWidth = 7.0;
 452       ctx->Const.MaxLineWidthAA = 7.0;
 453       ctx->Const.LineWidthGranularity = 0.5;
 454    }
 455
 456    /* For non-antialiased lines, we have to round the line width to the
 457     * nearest whole number. Make sure that we don't advertise a line
 458     * width that, when rounded, will be beyond the actual hardware
 459     * maximum.
 460     */
 461    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 462
 463    ctx->Const.MinPointSize = 1.0;
 464    ctx->Const.MinPointSizeAA = 1.0;
 465    ctx->Const.MaxPointSize = 255.0;
 466    ctx->Const.MaxPointSizeAA = 255.0;
 467    ctx->Const.PointSizeGranularity = 1.0;
 468
 469    if (brw->gen >= 5 || brw->is_g4x)
 470       ctx->Const.MaxClipPlanes = 8;
 471
 472    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 473    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 474    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 475    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 476    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 477    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 478    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 479    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 480    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 481    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 482    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 483    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 484       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 485            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 486
 487    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 488    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 489    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 490    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 491    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 492    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 493    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 494    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 495    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 496       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 497            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 498
 499    /* Fragment shaders use real, 32-bit twos-complement integers for all
 500     * integer types.
 501     */
 502    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 503    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 504    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 505    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 506    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 507
 508    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 509    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 510    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 511    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 512    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 513
 514    if (brw->gen >= 7) {
 515       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 516       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 517       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 518       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 519       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 520       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 521       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 522       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 523       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 524
 525       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
 526          BRW_MAX_IMAGES;
 527       ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
 528          (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
 529       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
 530          BRW_MAX_IMAGES;
 531       ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 532       ctx->Const.MaxCombinedShaderOutputResources =
 533          MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 534       ctx->Const.MaxImageSamples = 0;
 535       ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
 536    }
 537
 538    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 539     * but we're not sure how it's actually done for vertex order,
 540     * that affect provoking vertex decision. Always use last vertex
 541     * convention for quad primitive which works as expected for now.
 542     */
 543    if (brw->gen >= 6)
 544       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 545
 546    ctx->Const.NativeIntegers = true;
 547    ctx->Const.VertexID_is_zero_based = true;
 548
 549    /* Regarding the CMP instruction, the Ivybridge PRM says:
 550     *
 551     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 552     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 553     *    0xFFFFFFFF) is assigned to dst."
 554     *
 555     * but PRMs for earlier generations say
 556     *
 557     *   "In dword format, one GRF may store up to 8 results. When the register
 558     *    is used later as a vector of Booleans, as only LSB at each channel
 559     *    contains meaning [sic] data, software should make sure all higher bits
 560     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 561     *
 562     * We select the representation of a true boolean uniform to be ~0, and fix
 563     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 564     */
 565    ctx->Const.UniformBooleanTrue = ~0;
 566
 567    /* From the gen4 PRM, volume 4 page 127:
 568     *
 569     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 570     *      the base address of the first element of the surface, computed in
 571     *      software by adding the surface base address to the byte offset of
 572     *      the element in the buffer."
 573     *
 574     * However, unaligned accesses are slower, so enforce buffer alignment.
 575     */
 576    ctx->Const.UniformBufferOffsetAlignment = 16;
 577
 578    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 579     * that we can safely have the CPU and GPU writing the same SSBO on
 580     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 581     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 582     * be updating disjoint regions of the buffer simultaneously and that will
 583     * break if the regions overlap the same cacheline.
 584     */
 585    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 586    ctx->Const.TextureBufferOffsetAlignment = 16;
 587    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 588
 589    /* FIXME: Tessellation stages are not yet supported in i965, so
 590     * MaxCombinedShaderStorageBlocks doesn't take them into account.
 591     */
 592    ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO;
 593    ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO;
 594    ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
 595    ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
 596    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO;
 597    ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO;
 598    ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3;
 599    ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3;
 600
 601    if (_mesa_extension_override_enables.ARB_compute_shader)
 602       ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
 603
 604    if (brw->gen >= 6) {
 605       ctx->Const.MaxVarying = 32;
 606       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 607       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 608       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 609       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 610    }
 611
 612    /* We want the GLSL compiler to emit code that uses condition codes */
 613    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 614       ctx->Const.ShaderCompilerOptions[i] =
 615          brw->intelScreen->compiler->glsl_compiler_options[i];
 616    }
 617
 618    /* ARB_viewport_array */
 619    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 620       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 621       ctx->Const.ViewportSubpixelBits = 0;
 622
 623       /* Cast to float before negating because MaxViewportWidth is unsigned.
 624        */
 625       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 626       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 627    }
 628
 629    /* ARB_gpu_shader5 */
 630    if (brw->gen >= 7)
 631       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 632
 633    /* ARB_framebuffer_no_attachments */
 634    ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
 635    ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
 636    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 637    ctx->Const.MaxFramebufferSamples = max_samples;
 638 }
 639
 640 static void
 641 brw_adjust_cs_context_constants(struct brw_context *brw)
 642 {
 643    struct gl_context *ctx = &brw->ctx;
 644
 645    /* For ES, we set these constants based on SIMD8.
 646     *
 647     * TODO: Once we can always generate SIMD16, we should update this.
 648     *
 649     * For GL, we assume we can generate a SIMD16 program, but this currently
 650     * is not always true. This allows us to run more test cases, and will be
 651     * required based on desktop GL compute shader requirements.
 652     */
 653    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 654
 655    const uint32_t max_invocations = simd_size * brw->max_cs_threads;
 656    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 657    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 658    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 659    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 660 }
 661
 662 /**
 663  * Process driconf (drirc) options, setting appropriate context flags.
 664  *
 665  * intelInitExtensions still pokes at optionCache directly, in order to
 666  * avoid advertising various extensions.  No flags are set, so it makes
 667  * sense to continue doing that there.
 668  */
 669 static void
 670 brw_process_driconf_options(struct brw_context *brw)
 671 {
 672    struct gl_context *ctx = &brw->ctx;
 673
 674    driOptionCache *options = &brw->optionCache;
 675    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 676                        brw->driContext->driScreenPriv->myNum, "i965");
 677
 678    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 679    switch (bo_reuse_mode) {
 680    case DRI_CONF_BO_REUSE_DISABLED:
 681       break;
 682    case DRI_CONF_BO_REUSE_ALL:
 683       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 684       break;
 685    }
 686
 687    if (!driQueryOptionb(options, "hiz")) {
 688        brw->has_hiz = false;
 689        /* On gen6, you can only do separate stencil with HIZ. */
 690        if (brw->gen == 6)
 691           brw->has_separate_stencil = false;
 692    }
 693
 694    if (driQueryOptionb(options, "always_flush_batch")) {
 695       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 696       brw->always_flush_batch = true;
 697    }
 698
 699    if (driQueryOptionb(options, "always_flush_cache")) {
 700       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 701       brw->always_flush_cache = true;
 702    }
 703
 704    if (driQueryOptionb(options, "disable_throttling")) {
 705       fprintf(stderr, "disabling flush throttling\n");
 706       brw->disable_throttling = true;
 707    }
 708
 709    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 710
 711    ctx->Const.ForceGLSLExtensionsWarn =
 712       driQueryOptionb(options, "force_glsl_extensions_warn");
 713
 714    ctx->Const.DisableGLSLLineContinuations =
 715       driQueryOptionb(options, "disable_glsl_line_continuations");
 716
 717    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 718       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 719 }
 720
 721 GLboolean
 722 brwCreateContext(gl_api api,
 723                  const struct gl_config *mesaVis,
 724                  __DRIcontext *driContextPriv,
 725                  unsigned major_version,
 726                  unsigned minor_version,
 727                  uint32_t flags,
 728                  bool notify_reset,
 729                  unsigned *dri_ctx_error,
 730                  void *sharedContextPrivate)
 731 {
 732    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 733    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 734    struct intel_screen *screen = sPriv->driverPrivate;
 735    const struct brw_device_info *devinfo = screen->devinfo;
 736    struct dd_function_table functions;
 737
 738    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 739     * provides us with context reset notifications.
 740     */
 741    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 742       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 743
 744    if (screen->has_context_reset_notification)
 745       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 746
 747    if (flags & ~allowed_flags) {
 748       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 749       return false;
 750    }
 751
 752    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 753    if (!brw) {
 754       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 755       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 756       return false;
 757    }
 758
 759    driContextPriv->driverPrivate = brw;
 760    brw->driContext = driContextPriv;
 761    brw->intelScreen = screen;
 762    brw->bufmgr = screen->bufmgr;
 763
 764    brw->gen = devinfo->gen;
 765    brw->gt = devinfo->gt;
 766    brw->is_g4x = devinfo->is_g4x;
 767    brw->is_baytrail = devinfo->is_baytrail;
 768    brw->is_haswell = devinfo->is_haswell;
 769    brw->is_cherryview = devinfo->is_cherryview;
 770    brw->is_broxton = devinfo->is_broxton;
 771    brw->has_llc = devinfo->has_llc;
 772    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 773    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 774    brw->has_pln = devinfo->has_pln;
 775    brw->has_compr4 = devinfo->has_compr4;
 776    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 777    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 778    brw->needs_unlit_centroid_workaround =
 779       devinfo->needs_unlit_centroid_workaround;
 780
 781    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 782    brw->has_swizzling = screen->hw_has_swizzling;
 783
 784    brw->vs.base.stage = MESA_SHADER_VERTEX;
 785    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 786    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 787    if (brw->gen >= 8) {
 788       gen8_init_vtable_surface_functions(brw);
 789       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 790    } else if (brw->gen >= 7) {
 791       gen7_init_vtable_surface_functions(brw);
 792       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 793    } else if (brw->gen >= 6) {
 794       gen6_init_vtable_surface_functions(brw);
 795       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 796    } else {
 797       gen4_init_vtable_surface_functions(brw);
 798       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 799    }
 800
 801    brw_init_driver_functions(brw, &functions);
 802
 803    if (notify_reset)
 804       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 805
 806    struct gl_context *ctx = &brw->ctx;
 807
 808    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 809       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 810       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 811       intelDestroyContext(driContextPriv);
 812       return false;
 813    }
 814
 815    driContextSetFlags(ctx, flags);
 816
 817    /* Initialize the software rasterizer and helper modules.
 818     *
 819     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 820     * software fallbacks (which we have to support on legacy GL to do weird
 821     * glDrawPixels(), glBitmap(), and other functions).
 822     */
 823    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 824       _swrast_CreateContext(ctx);
 825    }
 826
 827    _vbo_CreateContext(ctx);
 828    if (ctx->swrast_context) {
 829       _tnl_CreateContext(ctx);
 830       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 831       _swsetup_CreateContext(ctx);
 832
 833       /* Configure swrast to match hardware characteristics: */
 834       _swrast_allow_pixel_fog(ctx, false);
 835       _swrast_allow_vertex_fog(ctx, true);
 836    }
 837
 838    _mesa_meta_init(ctx);
 839
 840    brw_process_driconf_options(brw);
 841
 842    if (INTEL_DEBUG & DEBUG_PERF)
 843       brw->perf_debug = true;
 844
 845    brw_initialize_context_constants(brw);
 846
 847    ctx->Const.ResetStrategy = notify_reset
 848       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 849
 850    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 851    _mesa_init_point(ctx);
 852
 853    intel_fbo_init(brw);
 854
 855    intel_batchbuffer_init(brw);
 856
 857    if (brw->gen >= 6) {
 858       /* Create a new hardware context.  Using a hardware context means that
 859        * our GPU state will be saved/restored on context switch, allowing us
 860        * to assume that the GPU is in the same state we left it in.
 861        *
 862        * This is required for transform feedback buffer offsets, query objects,
 863        * and also allows us to reduce how much state we have to emit.
 864        */
 865       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 866
 867       if (!brw->hw_ctx) {
 868          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 869          intelDestroyContext(driContextPriv);
 870          return false;
 871       }
 872    }
 873
 874    if (brw_init_pipe_control(brw, devinfo)) {
 875       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 876       intelDestroyContext(driContextPriv);
 877       return false;
 878    }
 879
 880    brw_init_state(brw);
 881
 882    intelInitExtensions(ctx);
 883
 884    brw_init_surface_formats(brw);
 885
 886    brw->max_vs_threads = devinfo->max_vs_threads;
 887    brw->max_hs_threads = devinfo->max_hs_threads;
 888    brw->max_ds_threads = devinfo->max_ds_threads;
 889    brw->max_gs_threads = devinfo->max_gs_threads;
 890    brw->max_wm_threads = devinfo->max_wm_threads;
 891    brw->max_cs_threads = devinfo->max_cs_threads;
 892    brw->urb.size = devinfo->urb.size;
 893    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 894    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 895    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 896    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 897    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 898
 899    brw_adjust_cs_context_constants(brw);
 900
 901    /* Estimate the size of the mappable aperture into the GTT.  There's an
 902     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 903     * It turns out it's basically always 256MB, though some ancient hardware
 904     * was smaller.
 905     */
 906    uint32_t gtt_size = 256 * 1024 * 1024;
 907
 908    /* We don't want to map two objects such that a memcpy between them would
 909     * just fault one mapping in and then the other over and over forever.  So
 910     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 911     * taken up by things like the framebuffer and the ringbuffer and such, so
 912     * be more conservative.
 913     */
 914    brw->max_gtt_map_object_size = gtt_size / 4;
 915
 916    if (brw->gen == 6)
 917       brw->urb.gs_present = false;
 918
 919    brw->prim_restart.in_progress = false;
 920    brw->prim_restart.enable_cut_index = false;
 921    brw->gs.enabled = false;
 922    brw->sf.viewport_transform_enable = true;
 923
 924    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 925
 926    brw->use_resource_streamer = screen->has_resource_streamer &&
 927       (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
 928        brw_env_var_as_boolean("INTEL_USE_GATHER", false));
 929
 930    ctx->VertexProgram._MaintainTnlProgram = true;
 931    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 932
 933    brw_draw_init( brw );
 934
 935    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 936       /* Turn on some extra GL_ARB_debug_output generation. */
 937       brw->perf_debug = true;
 938    }
 939
 940    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 941       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 942
 943    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 944       brw_init_shader_time(brw);
 945
 946    _mesa_compute_version(ctx);
 947
 948    _mesa_initialize_dispatch_tables(ctx);
 949    _mesa_initialize_vbo_vtxfmt(ctx);
 950
 951    if (ctx->Extensions.AMD_performance_monitor) {
 952       brw_init_performance_monitors(brw);
 953    }
 954
 955    vbo_use_buffer_objects(ctx);
 956    vbo_always_unmap_buffers(ctx);
 957
 958    return true;
 959 }
 960
 961 void
 962 intelDestroyContext(__DRIcontext * driContextPriv)
 963 {
 964    struct brw_context *brw =
 965       (struct brw_context *) driContextPriv->driverPrivate;
 966    struct gl_context *ctx = &brw->ctx;
 967
 968    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 969    if (INTEL_DEBUG & DEBUG_AUB) {
 970       intel_batchbuffer_flush(brw);
 971       aub_dump_bmp(&brw->ctx);
 972    }
 973
 974    _mesa_meta_free(&brw->ctx);
 975    brw_meta_fast_clear_free(brw);
 976
 977    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 978       /* Force a report. */
 979       brw->shader_time.report_time = 0;
 980
 981       brw_collect_and_report_shader_time(brw);
 982       brw_destroy_shader_time(brw);
 983    }
 984
 985    brw_destroy_state(brw);
 986    brw_draw_destroy(brw);
 987
 988    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 989    if (brw->vs.base.scratch_bo)
 990       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
 991    if (brw->gs.base.scratch_bo)
 992       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
 993    if (brw->wm.base.scratch_bo)
 994       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 995
 996    gen7_reset_hw_bt_pool_offsets(brw);
 997    drm_intel_bo_unreference(brw->hw_bt_pool.bo);
 998    brw->hw_bt_pool.bo = NULL;
 999
1000    drm_intel_gem_context_destroy(brw->hw_ctx);
1001
1002    if (ctx->swrast_context) {
1003       _swsetup_DestroyContext(&brw->ctx);
1004       _tnl_DestroyContext(&brw->ctx);
1005    }
1006    _vbo_DestroyContext(&brw->ctx);
1007
1008    if (ctx->swrast_context)
1009       _swrast_DestroyContext(&brw->ctx);
1010
1011    brw_fini_pipe_control(brw);
1012    intel_batchbuffer_free(brw);
1013
1014    drm_intel_bo_unreference(brw->throttle_batch[1]);
1015    drm_intel_bo_unreference(brw->throttle_batch[0]);
1016    brw->throttle_batch[1] = NULL;
1017    brw->throttle_batch[0] = NULL;
1018
1019    driDestroyOptionCache(&brw->optionCache);
1020
1021    /* free the Mesa context */
1022    _mesa_free_context_data(&brw->ctx);
1023
1024    ralloc_free(brw);
1025    driContextPriv->driverPrivate = NULL;
1026 }
1027
1028 GLboolean
1029 intelUnbindContext(__DRIcontext * driContextPriv)
1030 {
1031    /* Unset current context and dispath table */
1032    _mesa_make_current(NULL, NULL, NULL);
1033
1034    return true;
1035 }
1036
1037 /**
1038  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1039  * on window system framebuffers.
1040  *
1041  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1042  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1043  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1044  * for a visual where you're guaranteed to be capable, but it turns out that
1045  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1046  * incapable ones, because there's no difference between the two in resources
1047  * used.  Applications thus get built that accidentally rely on the default
1048  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1049  * great...
1050  *
1051  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1052  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1053  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1054  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1055  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1056  * and get no sRGB encode (assuming that both kinds of visual are available).
1057  * Thus our choice to support sRGB by default on our visuals for desktop would
1058  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1059  *
1060  * Unfortunately, renderbuffer setup happens before a context is created.  So
1061  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1062  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1063  * yet), we go turn that back off before anyone finds out.
1064  */
1065 static void
1066 intel_gles3_srgb_workaround(struct brw_context *brw,
1067                             struct gl_framebuffer *fb)
1068 {
1069    struct gl_context *ctx = &brw->ctx;
1070
1071    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1072       return;
1073
1074    /* Some day when we support the sRGB capable bit on visuals available for
1075     * GLES, we'll need to respect that and not disable things here.
1076     */
1077    fb->Visual.sRGBCapable = false;
1078    for (int i = 0; i < BUFFER_COUNT; i++) {
1079       if (fb->Attachment[i].Renderbuffer &&
1080           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1081          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1082       }
1083    }
1084 }
1085
1086 GLboolean
1087 intelMakeCurrent(__DRIcontext * driContextPriv,
1088                  __DRIdrawable * driDrawPriv,
1089                  __DRIdrawable * driReadPriv)
1090 {
1091    struct brw_context *brw;
1092    GET_CURRENT_CONTEXT(curCtx);
1093
1094    if (driContextPriv)
1095       brw = (struct brw_context *) driContextPriv->driverPrivate;
1096    else
1097       brw = NULL;
1098
1099    /* According to the glXMakeCurrent() man page: "Pending commands to
1100     * the previous context, if any, are flushed before it is released."
1101     * But only flush if we're actually changing contexts.
1102     */
1103    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1104       _mesa_flush(curCtx);
1105    }
1106
1107    if (driContextPriv) {
1108       struct gl_context *ctx = &brw->ctx;
1109       struct gl_framebuffer *fb, *readFb;
1110
1111       if (driDrawPriv == NULL) {
1112          fb = _mesa_get_incomplete_framebuffer();
1113       } else {
1114          fb = driDrawPriv->driverPrivate;
1115          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1116       }
1117
1118       if (driReadPriv == NULL) {
1119          readFb = _mesa_get_incomplete_framebuffer();
1120       } else {
1121          readFb = driReadPriv->driverPrivate;
1122          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1123       }
1124
1125       /* The sRGB workaround changes the renderbuffer's format. We must change
1126        * the format before the renderbuffer's miptree get's allocated, otherwise
1127        * the formats of the renderbuffer and its miptree will differ.
1128        */
1129       intel_gles3_srgb_workaround(brw, fb);
1130       intel_gles3_srgb_workaround(brw, readFb);
1131
1132       /* If the context viewport hasn't been initialized, force a call out to
1133        * the loader to get buffers so we have a drawable size for the initial
1134        * viewport. */
1135       if (!brw->ctx.ViewportInitialized)
1136          intel_prepare_render(brw);
1137
1138       _mesa_make_current(ctx, fb, readFb);
1139    } else {
1140       _mesa_make_current(NULL, NULL, NULL);
1141    }
1142
1143    return true;
1144 }
1145
1146 void
1147 intel_resolve_for_dri2_flush(struct brw_context *brw,
1148                              __DRIdrawable *drawable)
1149 {
1150    if (brw->gen < 6) {
1151       /* MSAA and fast color clear are not supported, so don't waste time
1152        * checking whether a resolve is needed.
1153        */
1154       return;
1155    }
1156
1157    struct gl_framebuffer *fb = drawable->driverPrivate;
1158    struct intel_renderbuffer *rb;
1159
1160    /* Usually, only the back buffer will need to be downsampled. However,
1161     * the front buffer will also need it if the user has rendered into it.
1162     */
1163    static const gl_buffer_index buffers[2] = {
1164          BUFFER_BACK_LEFT,
1165          BUFFER_FRONT_LEFT,
1166    };
1167
1168    for (int i = 0; i < 2; ++i) {
1169       rb = intel_get_renderbuffer(fb, buffers[i]);
1170       if (rb == NULL || rb->mt == NULL)
1171          continue;
1172       if (rb->mt->num_samples <= 1)
1173          intel_miptree_resolve_color(brw, rb->mt);
1174       else
1175          intel_renderbuffer_downsample(brw, rb);
1176    }
1177 }
1178
1179 static unsigned
1180 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1181 {
1182    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1183 }
1184
1185 static void
1186 intel_query_dri2_buffers(struct brw_context *brw,
1187                          __DRIdrawable *drawable,
1188                          __DRIbuffer **buffers,
1189                          int *count);
1190
1191 static void
1192 intel_process_dri2_buffer(struct brw_context *brw,
1193                           __DRIdrawable *drawable,
1194                           __DRIbuffer *buffer,
1195                           struct intel_renderbuffer *rb,
1196                           const char *buffer_name);
1197
1198 static void
1199 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1200
1201 static void
1202 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1203 {
1204    struct gl_framebuffer *fb = drawable->driverPrivate;
1205    struct intel_renderbuffer *rb;
1206    __DRIbuffer *buffers = NULL;
1207    int i, count;
1208    const char *region_name;
1209
1210    /* Set this up front, so that in case our buffers get invalidated
1211     * while we're getting new buffers, we don't clobber the stamp and
1212     * thus ignore the invalidate. */
1213    drawable->lastStamp = drawable->dri2.stamp;
1214
1215    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1216       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1217
1218    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1219
1220    if (buffers == NULL)
1221       return;
1222
1223    for (i = 0; i < count; i++) {
1224        switch (buffers[i].attachment) {
1225        case __DRI_BUFFER_FRONT_LEFT:
1226            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1227            region_name = "dri2 front buffer";
1228            break;
1229
1230        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1231            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1232            region_name = "dri2 fake front buffer";
1233            break;
1234
1235        case __DRI_BUFFER_BACK_LEFT:
1236            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1237            region_name = "dri2 back buffer";
1238            break;
1239
1240        case __DRI_BUFFER_DEPTH:
1241        case __DRI_BUFFER_HIZ:
1242        case __DRI_BUFFER_DEPTH_STENCIL:
1243        case __DRI_BUFFER_STENCIL:
1244        case __DRI_BUFFER_ACCUM:
1245        default:
1246            fprintf(stderr,
1247                    "unhandled buffer attach event, attachment type %d\n",
1248                    buffers[i].attachment);
1249            return;
1250        }
1251
1252        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1253    }
1254
1255 }
1256
1257 void
1258 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1259 {
1260    struct brw_context *brw = context->driverPrivate;
1261    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1262
1263    /* Set this up front, so that in case our buffers get invalidated
1264     * while we're getting new buffers, we don't clobber the stamp and
1265     * thus ignore the invalidate. */
1266    drawable->lastStamp = drawable->dri2.stamp;
1267
1268    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1269       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1270
1271    if (screen->image.loader)
1272       intel_update_image_buffers(brw, drawable);
1273    else
1274       intel_update_dri2_buffers(brw, drawable);
1275
1276    driUpdateFramebufferSize(&brw->ctx, drawable);
1277 }
1278
1279 /**
1280  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1281  * state is required.
1282  */
1283 void
1284 intel_prepare_render(struct brw_context *brw)
1285 {
1286    struct gl_context *ctx = &brw->ctx;
1287    __DRIcontext *driContext = brw->driContext;
1288    __DRIdrawable *drawable;
1289
1290    drawable = driContext->driDrawablePriv;
1291    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1292       if (drawable->lastStamp != drawable->dri2.stamp)
1293          intel_update_renderbuffers(driContext, drawable);
1294       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1295    }
1296
1297    drawable = driContext->driReadablePriv;
1298    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1299       if (drawable->lastStamp != drawable->dri2.stamp)
1300          intel_update_renderbuffers(driContext, drawable);
1301       driContext->dri2.read_stamp = drawable->dri2.stamp;
1302    }
1303
1304    /* If we're currently rendering to the front buffer, the rendering
1305     * that will happen next will probably dirty the front buffer.  So
1306     * mark it as dirty here.
1307     */
1308    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1309       brw->front_buffer_dirty = true;
1310 }
1311
1312 /**
1313  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1314  *
1315  * To determine which DRI buffers to request, examine the renderbuffers
1316  * attached to the drawable's framebuffer. Then request the buffers with
1317  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1318  *
1319  * This is called from intel_update_renderbuffers().
1320  *
1321  * \param drawable      Drawable whose buffers are queried.
1322  * \param buffers       [out] List of buffers returned by DRI2 query.
1323  * \param buffer_count  [out] Number of buffers returned.
1324  *
1325  * \see intel_update_renderbuffers()
1326  * \see DRI2GetBuffers()
1327  * \see DRI2GetBuffersWithFormat()
1328  */
1329 static void
1330 intel_query_dri2_buffers(struct brw_context *brw,
1331                          __DRIdrawable *drawable,
1332                          __DRIbuffer **buffers,
1333                          int *buffer_count)
1334 {
1335    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1336    struct gl_framebuffer *fb = drawable->driverPrivate;
1337    int i = 0;
1338    unsigned attachments[8];
1339
1340    struct intel_renderbuffer *front_rb;
1341    struct intel_renderbuffer *back_rb;
1342
1343    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1344    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1345
1346    memset(attachments, 0, sizeof(attachments));
1347    if ((_mesa_is_front_buffer_drawing(fb) ||
1348         _mesa_is_front_buffer_reading(fb) ||
1349         !back_rb) && front_rb) {
1350       /* If a fake front buffer is in use, then querying for
1351        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1352        * the real front buffer to the fake front buffer.  So before doing the
1353        * query, we need to make sure all the pending drawing has landed in the
1354        * real front buffer.
1355        */
1356       intel_batchbuffer_flush(brw);
1357       intel_flush_front(&brw->ctx);
1358
1359       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1360       attachments[i++] = intel_bits_per_pixel(front_rb);
1361    } else if (front_rb && brw->front_buffer_dirty) {
1362       /* We have pending front buffer rendering, but we aren't querying for a
1363        * front buffer.  If the front buffer we have is a fake front buffer,
1364        * the X server is going to throw it away when it processes the query.
1365        * So before doing the query, make sure all the pending drawing has
1366        * landed in the real front buffer.
1367        */
1368       intel_batchbuffer_flush(brw);
1369       intel_flush_front(&brw->ctx);
1370    }
1371
1372    if (back_rb) {
1373       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1374       attachments[i++] = intel_bits_per_pixel(back_rb);
1375    }
1376
1377    assert(i <= ARRAY_SIZE(attachments));
1378
1379    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1380                                                         &drawable->w,
1381                                                         &drawable->h,
1382                                                         attachments, i / 2,
1383                                                         buffer_count,
1384                                                         drawable->loaderPrivate);
1385 }
1386
1387 /**
1388  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1389  *
1390  * This is called from intel_update_renderbuffers().
1391  *
1392  * \par Note:
1393  *    DRI buffers whose attachment point is DRI2BufferStencil or
1394  *    DRI2BufferDepthStencil are handled as special cases.
1395  *
1396  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1397  *        that is passed to drm_intel_bo_gem_create_from_name().
1398  *
1399  * \see intel_update_renderbuffers()
1400  */
1401 static void
1402 intel_process_dri2_buffer(struct brw_context *brw,
1403                           __DRIdrawable *drawable,
1404                           __DRIbuffer *buffer,
1405                           struct intel_renderbuffer *rb,
1406                           const char *buffer_name)
1407 {
1408    struct gl_framebuffer *fb = drawable->driverPrivate;
1409    drm_intel_bo *bo;
1410
1411    if (!rb)
1412       return;
1413
1414    unsigned num_samples = rb->Base.Base.NumSamples;
1415
1416    /* We try to avoid closing and reopening the same BO name, because the first
1417     * use of a mapping of the buffer involves a bunch of page faulting which is
1418     * moderately expensive.
1419     */
1420    struct intel_mipmap_tree *last_mt;
1421    if (num_samples == 0)
1422       last_mt = rb->mt;
1423    else
1424       last_mt = rb->singlesample_mt;
1425
1426    uint32_t old_name = 0;
1427    if (last_mt) {
1428        /* The bo already has a name because the miptree was created by a
1429         * previous call to intel_process_dri2_buffer(). If a bo already has a
1430         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1431         * create a new name.
1432         */
1433       drm_intel_bo_flink(last_mt->bo, &old_name);
1434    }
1435
1436    if (old_name == buffer->name)
1437       return;
1438
1439    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1440       fprintf(stderr,
1441               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1442               buffer->name, buffer->attachment,
1443               buffer->cpp, buffer->pitch);
1444    }
1445
1446    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1447                                           buffer->name);
1448    if (!bo) {
1449       fprintf(stderr,
1450               "Failed to open BO for returned DRI2 buffer "
1451               "(%dx%d, %s, named %d).\n"
1452               "This is likely a bug in the X Server that will lead to a "
1453               "crash soon.\n",
1454               drawable->w, drawable->h, buffer_name, buffer->name);
1455       return;
1456    }
1457
1458    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1459                                             drawable->w, drawable->h,
1460                                             buffer->pitch);
1461
1462    if (_mesa_is_front_buffer_drawing(fb) &&
1463        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1464         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1465        rb->Base.Base.NumSamples > 1) {
1466       intel_renderbuffer_upsample(brw, rb);
1467    }
1468
1469    assert(rb->mt);
1470
1471    drm_intel_bo_unreference(bo);
1472 }
1473
1474 /**
1475  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1476  *
1477  * To determine which DRI buffers to request, examine the renderbuffers
1478  * attached to the drawable's framebuffer. Then request the buffers from
1479  * the image loader
1480  *
1481  * This is called from intel_update_renderbuffers().
1482  *
1483  * \param drawable      Drawable whose buffers are queried.
1484  * \param buffers       [out] List of buffers returned by DRI2 query.
1485  * \param buffer_count  [out] Number of buffers returned.
1486  *
1487  * \see intel_update_renderbuffers()
1488  */
1489
1490 static void
1491 intel_update_image_buffer(struct brw_context *intel,
1492                           __DRIdrawable *drawable,
1493                           struct intel_renderbuffer *rb,
1494                           __DRIimage *buffer,
1495                           enum __DRIimageBufferMask buffer_type)
1496 {
1497    struct gl_framebuffer *fb = drawable->driverPrivate;
1498
1499    if (!rb || !buffer->bo)
1500       return;
1501
1502    unsigned num_samples = rb->Base.Base.NumSamples;
1503
1504    /* Check and see if we're already bound to the right
1505     * buffer object
1506     */
1507    struct intel_mipmap_tree *last_mt;
1508    if (num_samples == 0)
1509       last_mt = rb->mt;
1510    else
1511       last_mt = rb->singlesample_mt;
1512
1513    if (last_mt && last_mt->bo == buffer->bo)
1514       return;
1515
1516    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1517                                             buffer->width, buffer->height,
1518                                             buffer->pitch);
1519
1520    if (_mesa_is_front_buffer_drawing(fb) &&
1521        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1522        rb->Base.Base.NumSamples > 1) {
1523       intel_renderbuffer_upsample(intel, rb);
1524    }
1525 }
1526
1527 static void
1528 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1529 {
1530    struct gl_framebuffer *fb = drawable->driverPrivate;
1531    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1532    struct intel_renderbuffer *front_rb;
1533    struct intel_renderbuffer *back_rb;
1534    struct __DRIimageList images;
1535    unsigned int format;
1536    uint32_t buffer_mask = 0;
1537
1538    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1539    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1540
1541    if (back_rb)
1542       format = intel_rb_format(back_rb);
1543    else if (front_rb)
1544       format = intel_rb_format(front_rb);
1545    else
1546       return;
1547
1548    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1549                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1550       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1551    }
1552
1553    if (back_rb)
1554       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1555
1556    (*screen->image.loader->getBuffers) (drawable,
1557                                         driGLFormatToImageFormat(format),
1558                                         &drawable->dri2.stamp,
1559                                         drawable->loaderPrivate,
1560                                         buffer_mask,
1561                                         &images);
1562
1563    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1564       drawable->w = images.front->width;
1565       drawable->h = images.front->height;
1566       intel_update_image_buffer(brw,
1567                                 drawable,
1568                                 front_rb,
1569                                 images.front,
1570                                 __DRI_IMAGE_BUFFER_FRONT);
1571    }
1572    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1573       drawable->w = images.back->width;
1574       drawable->h = images.back->height;
1575       intel_update_image_buffer(brw,
1576                                 drawable,
1577                                 back_rb,
1578                                 images.back,
1579                                 __DRI_IMAGE_BUFFER_BACK);
1580    }
1581 }