src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44 #include "main/framebuffer.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/driverfuncs.h"
  49 #include "drivers/common/meta.h"
  50 #include "utils.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_defines.h"
  54 #include "brw_compiler.h"
  55 #include "brw_draw.h"
  56 #include "brw_state.h"
  57
  58 #include "intel_batchbuffer.h"
  59 #include "intel_buffer_objects.h"
  60 #include "intel_buffers.h"
  61 #include "intel_fbo.h"
  62 #include "intel_mipmap_tree.h"
  63 #include "intel_pixel.h"
  64 #include "intel_image.h"
  65 #include "intel_tex.h"
  66 #include "intel_tex_obj.h"
  67
  68 #include "swrast_setup/swrast_setup.h"
  69 #include "tnl/tnl.h"
  70 #include "tnl/t_pipeline.h"
  71 #include "util/ralloc.h"
  72 #include "util/debug.h"
  73
  74 /***************************************
  75  * Mesa's Driver Functions
  76  ***************************************/
  77
  78 static size_t
  79 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  80                              GLenum internalFormat, int samples[16])
  81 {
  82    struct brw_context *brw = brw_context(ctx);
  83
  84    (void) target;
  85
  86    switch (brw->gen) {
  87    case 9:
  88       samples[0] = 16;
  89       samples[1] = 8;
  90       samples[2] = 4;
  91       samples[3] = 2;
  92       return 4;
  93
  94    case 8:
  95       samples[0] = 8;
  96       samples[1] = 4;
  97       samples[2] = 2;
  98       return 3;
  99
 100    case 7:
 101       samples[0] = 8;
 102       samples[1] = 4;
 103       return 2;
 104
 105    case 6:
 106       samples[0] = 4;
 107       return 1;
 108
 109    default:
 110       assert(brw->gen < 6);
 111       samples[0] = 1;
 112       return 1;
 113    }
 114 }
 115
 116 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 117
 118 const char *
 119 brw_get_renderer_string(unsigned deviceID)
 120 {
 121    const char *chipset;
 122    static char buffer[128];
 123
 124    switch (deviceID) {
 125 #undef CHIPSET
 126 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 127 #include "pci_ids/i965_pci_ids.h"
 128    default:
 129       chipset = "Unknown Intel Chipset";
 130       break;
 131    }
 132
 133    (void) driGetRendererString(buffer, chipset, 0);
 134    return buffer;
 135 }
 136
 137 static const GLubyte *
 138 intel_get_string(struct gl_context * ctx, GLenum name)
 139 {
 140    const struct brw_context *const brw = brw_context(ctx);
 141
 142    switch (name) {
 143    case GL_VENDOR:
 144       return (GLubyte *) brw_vendor_string;
 145
 146    case GL_RENDERER:
 147       return
 148          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 149
 150    default:
 151       return NULL;
 152    }
 153 }
 154
 155 static void
 156 intel_viewport(struct gl_context *ctx)
 157 {
 158    struct brw_context *brw = brw_context(ctx);
 159    __DRIcontext *driContext = brw->driContext;
 160
 161    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 162       if (driContext->driDrawablePriv)
 163          dri2InvalidateDrawable(driContext->driDrawablePriv);
 164       if (driContext->driReadablePriv)
 165          dri2InvalidateDrawable(driContext->driReadablePriv);
 166    }
 167 }
 168
 169 static void
 170 intel_update_framebuffer(struct gl_context *ctx,
 171                          struct gl_framebuffer *fb)
 172 {
 173    struct brw_context *brw = brw_context(ctx);
 174
 175    /* Quantize the derived default number of samples
 176     */
 177    fb->DefaultGeometry._NumSamples =
 178       intel_quantize_num_samples(brw->intelScreen,
 179                                  fb->DefaultGeometry.NumSamples);
 180 }
 181
 182 static void
 183 intel_update_state(struct gl_context * ctx, GLuint new_state)
 184 {
 185    struct brw_context *brw = brw_context(ctx);
 186    struct intel_texture_object *tex_obj;
 187    struct intel_renderbuffer *depth_irb;
 188
 189    if (ctx->swrast_context)
 190       _swrast_InvalidateState(ctx, new_state);
 191    _vbo_InvalidateState(ctx, new_state);
 192
 193    brw->NewGLState |= new_state;
 194
 195    _mesa_unlock_context_textures(ctx);
 196
 197    /* Resolve the depth buffer's HiZ buffer. */
 198    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 199    if (depth_irb)
 200       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 201
 202    /* Resolve depth buffer and render cache of each enabled texture. */
 203    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 204    for (int i = 0; i <= maxEnabledUnit; i++) {
 205       if (!ctx->Texture.Unit[i]._Current)
 206          continue;
 207       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 208       if (!tex_obj || !tex_obj->mt)
 209          continue;
 210       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 211       intel_miptree_resolve_color(brw, tex_obj->mt);
 212       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 213    }
 214
 215    /* Resolve color for each active shader image. */
 216    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 217       const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
 218          ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
 219
 220       if (unlikely(shader && shader->NumImages)) {
 221          for (unsigned j = 0; j < shader->NumImages; j++) {
 222             struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
 223             tex_obj = intel_texture_object(u->TexObj);
 224
 225             if (tex_obj && tex_obj->mt) {
 226                intel_miptree_resolve_color(brw, tex_obj->mt);
 227                brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 228             }
 229          }
 230       }
 231    }
 232
 233    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
 234     * single-sampled color renderbuffers because the CCS buffer isn't
 235     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
 236     * enabled because otherwise the surface state will be programmed with the
 237     * linear equivalent format anyway.
 238     */
 239    if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
 240       struct gl_framebuffer *fb = ctx->DrawBuffer;
 241       for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
 242          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
 243
 244          if (rb == NULL)
 245             continue;
 246
 247          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 248          struct intel_mipmap_tree *mt = irb->mt;
 249
 250          if (mt == NULL ||
 251              mt->num_samples > 1 ||
 252              _mesa_get_srgb_format_linear(mt->format) == mt->format)
 253                continue;
 254
 255          intel_miptree_resolve_color(brw, mt);
 256          brw_render_cache_set_check_flush(brw, mt->bo);
 257       }
 258    }
 259
 260    _mesa_lock_context_textures(ctx);
 261
 262    if (new_state & _NEW_BUFFERS) {
 263       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 264       if (ctx->DrawBuffer != ctx->ReadBuffer)
 265          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 266    }
 267 }
 268
 269 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 270
 271 static void
 272 intel_flush_front(struct gl_context *ctx)
 273 {
 274    struct brw_context *brw = brw_context(ctx);
 275    __DRIcontext *driContext = brw->driContext;
 276    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 277    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 278
 279    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 280       if (flushFront(screen) && driDrawable &&
 281           driDrawable->loaderPrivate) {
 282
 283          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 284           *
 285           * This potentially resolves both front and back buffer. It
 286           * is unnecessary to resolve the back, but harms nothing except
 287           * performance. And no one cares about front-buffer render
 288           * performance.
 289           */
 290          intel_resolve_for_dri2_flush(brw, driDrawable);
 291          intel_batchbuffer_flush(brw);
 292
 293          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 294
 295          /* We set the dirty bit in intel_prepare_render() if we're
 296           * front buffer rendering once we get there.
 297           */
 298          brw->front_buffer_dirty = false;
 299       }
 300    }
 301 }
 302
 303 static void
 304 intel_glFlush(struct gl_context *ctx)
 305 {
 306    struct brw_context *brw = brw_context(ctx);
 307
 308    intel_batchbuffer_flush(brw);
 309    intel_flush_front(ctx);
 310
 311    brw->need_flush_throttle = true;
 312 }
 313
 314 static void
 315 intel_finish(struct gl_context * ctx)
 316 {
 317    struct brw_context *brw = brw_context(ctx);
 318
 319    intel_glFlush(ctx);
 320
 321    if (brw->batch.last_bo)
 322       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 323 }
 324
 325 static void
 326 brw_init_driver_functions(struct brw_context *brw,
 327                           struct dd_function_table *functions)
 328 {
 329    _mesa_init_driver_functions(functions);
 330
 331    /* GLX uses DRI2 invalidate events to handle window resizing.
 332     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 333     * which doesn't provide a mechanism for snooping the event queues.
 334     *
 335     * So EGL still relies on viewport hacks to handle window resizing.
 336     * This should go away with DRI3000.
 337     */
 338    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 339       functions->Viewport = intel_viewport;
 340
 341    functions->Flush = intel_glFlush;
 342    functions->Finish = intel_finish;
 343    functions->GetString = intel_get_string;
 344    functions->UpdateState = intel_update_state;
 345
 346    intelInitTextureFuncs(functions);
 347    intelInitTextureImageFuncs(functions);
 348    intelInitTextureSubImageFuncs(functions);
 349    intelInitTextureCopyImageFuncs(functions);
 350    intelInitCopyImageFuncs(functions);
 351    intelInitClearFuncs(functions);
 352    intelInitBufferFuncs(functions);
 353    intelInitPixelFuncs(functions);
 354    intelInitBufferObjectFuncs(functions);
 355    intel_init_syncobj_functions(functions);
 356    brw_init_object_purgeable_functions(functions);
 357
 358    brwInitFragProgFuncs( functions );
 359    brw_init_common_queryobj_functions(functions);
 360    if (brw->gen >= 6)
 361       gen6_init_queryobj_functions(functions);
 362    else
 363       gen4_init_queryobj_functions(functions);
 364    brw_init_compute_functions(functions);
 365    if (brw->gen >= 7)
 366       brw_init_conditional_render_functions(functions);
 367
 368    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 369
 370    functions->NewTransformFeedback = brw_new_transform_feedback;
 371    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 372    functions->GetTransformFeedbackVertexCount =
 373       brw_get_transform_feedback_vertex_count;
 374    if (brw->gen >= 7) {
 375       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 376       functions->EndTransformFeedback = gen7_end_transform_feedback;
 377       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 378       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 379    } else {
 380       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 381       functions->EndTransformFeedback = brw_end_transform_feedback;
 382    }
 383
 384    if (brw->gen >= 6)
 385       functions->GetSamplePosition = gen6_get_sample_position;
 386 }
 387
 388 static void
 389 brw_initialize_context_constants(struct brw_context *brw)
 390 {
 391    struct gl_context *ctx = &brw->ctx;
 392    const struct brw_compiler *compiler = brw->intelScreen->compiler;
 393
 394    const bool stage_exists[MESA_SHADER_STAGES] = {
 395       [MESA_SHADER_VERTEX] = true,
 396       [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
 397       [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
 398       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
 399       [MESA_SHADER_FRAGMENT] = true,
 400       [MESA_SHADER_COMPUTE] =
 401          (ctx->API == API_OPENGL_CORE &&
 402           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 403          (ctx->API == API_OPENGLES2 &&
 404           ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
 405          _mesa_extension_override_enables.ARB_compute_shader,
 406    };
 407
 408    unsigned num_stages = 0;
 409    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 410       if (stage_exists[i])
 411          num_stages++;
 412    }
 413
 414    unsigned max_samplers =
 415       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 416
 417    ctx->Const.MaxDualSourceDrawBuffers = 1;
 418    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 419    ctx->Const.MaxCombinedShaderOutputResources =
 420       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 421
 422    ctx->Const.QueryCounterBits.Timestamp = 36;
 423
 424    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 425    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 426    ctx->Const.MaxRenderbufferSize = 8192;
 427    ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
 428    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 429    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 430    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
 431    ctx->Const.MaxTextureMbytes = 1536;
 432    ctx->Const.MaxTextureRectSize = 1 << 12;
 433    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 434    ctx->Const.StripTextureBorder = true;
 435    if (brw->gen >= 7)
 436       ctx->Const.MaxProgramTextureGatherComponents = 4;
 437    else if (brw->gen == 6)
 438       ctx->Const.MaxProgramTextureGatherComponents = 1;
 439
 440    ctx->Const.MaxUniformBlockSize = 65536;
 441
 442    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 443       struct gl_program_constants *prog = &ctx->Const.Program[i];
 444
 445       if (!stage_exists[i])
 446          continue;
 447
 448       prog->MaxTextureImageUnits = max_samplers;
 449
 450       prog->MaxUniformBlocks = BRW_MAX_UBO;
 451       prog->MaxCombinedUniformComponents =
 452          prog->MaxUniformComponents +
 453          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 454
 455       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 456       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 457       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 458       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 459    }
 460
 461    ctx->Const.MaxTextureUnits =
 462       MIN2(ctx->Const.MaxTextureCoordUnits,
 463            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 464
 465    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 466    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 467    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 468    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 469    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 470    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 471    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 472
 473
 474    /* Hardware only supports a limited number of transform feedback buffers.
 475     * So we need to override the Mesa default (which is based only on software
 476     * limits).
 477     */
 478    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 479
 480    /* On Gen6, in the worst case, we use up one binding table entry per
 481     * transform feedback component (see comments above the definition of
 482     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 483     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 484     * BRW_MAX_SOL_BINDINGS.
 485     *
 486     * In "separate components" mode, we need to divide this value by
 487     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 488     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 489     */
 490    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 491    ctx->Const.MaxTransformFeedbackSeparateComponents =
 492       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 493
 494    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 495
 496    int max_samples;
 497    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 498    const int clamp_max_samples =
 499       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 500
 501    if (clamp_max_samples < 0) {
 502       max_samples = msaa_modes[0];
 503    } else {
 504       /* Select the largest supported MSAA mode that does not exceed
 505        * clamp_max_samples.
 506        */
 507       max_samples = 0;
 508       for (int i = 0; msaa_modes[i] != 0; ++i) {
 509          if (msaa_modes[i] <= clamp_max_samples) {
 510             max_samples = msaa_modes[i];
 511             break;
 512          }
 513       }
 514    }
 515
 516    ctx->Const.MaxSamples = max_samples;
 517    ctx->Const.MaxColorTextureSamples = max_samples;
 518    ctx->Const.MaxDepthTextureSamples = max_samples;
 519    ctx->Const.MaxIntegerSamples = max_samples;
 520    ctx->Const.MaxImageSamples = 0;
 521
 522    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 523     * to map indices of rectangular grid to sample numbers within a pixel.
 524     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 525     * extension implementation. For more details see the comment above
 526     * gen6_set_sample_maps() definition.
 527     */
 528    gen6_set_sample_maps(ctx);
 529
 530    ctx->Const.MinLineWidth = 1.0;
 531    ctx->Const.MinLineWidthAA = 1.0;
 532    if (brw->gen >= 6) {
 533       ctx->Const.MaxLineWidth = 7.375;
 534       ctx->Const.MaxLineWidthAA = 7.375;
 535       ctx->Const.LineWidthGranularity = 0.125;
 536    } else {
 537       ctx->Const.MaxLineWidth = 7.0;
 538       ctx->Const.MaxLineWidthAA = 7.0;
 539       ctx->Const.LineWidthGranularity = 0.5;
 540    }
 541
 542    /* For non-antialiased lines, we have to round the line width to the
 543     * nearest whole number. Make sure that we don't advertise a line
 544     * width that, when rounded, will be beyond the actual hardware
 545     * maximum.
 546     */
 547    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 548
 549    ctx->Const.MinPointSize = 1.0;
 550    ctx->Const.MinPointSizeAA = 1.0;
 551    ctx->Const.MaxPointSize = 255.0;
 552    ctx->Const.MaxPointSizeAA = 255.0;
 553    ctx->Const.PointSizeGranularity = 1.0;
 554
 555    if (brw->gen >= 5 || brw->is_g4x)
 556       ctx->Const.MaxClipPlanes = 8;
 557
 558    ctx->Const.LowerTessLevel = true;
 559
 560    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 561    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 562    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 563    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 564    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 565    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 566    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 567    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 568    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 569    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 570    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 571    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 572       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 573            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 574
 575    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 576    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 577    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 578    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 579    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 580    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 581    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 582    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 583    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 584       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 585            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 586
 587    /* Fragment shaders use real, 32-bit twos-complement integers for all
 588     * integer types.
 589     */
 590    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 591    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 592    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 593    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 594    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 595
 596    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 597    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 598    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 599    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 600    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 601
 602    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 603     * but we're not sure how it's actually done for vertex order,
 604     * that affect provoking vertex decision. Always use last vertex
 605     * convention for quad primitive which works as expected for now.
 606     */
 607    if (brw->gen >= 6)
 608       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 609
 610    ctx->Const.NativeIntegers = true;
 611    ctx->Const.VertexID_is_zero_based = true;
 612
 613    /* Regarding the CMP instruction, the Ivybridge PRM says:
 614     *
 615     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 616     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 617     *    0xFFFFFFFF) is assigned to dst."
 618     *
 619     * but PRMs for earlier generations say
 620     *
 621     *   "In dword format, one GRF may store up to 8 results. When the register
 622     *    is used later as a vector of Booleans, as only LSB at each channel
 623     *    contains meaning [sic] data, software should make sure all higher bits
 624     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 625     *
 626     * We select the representation of a true boolean uniform to be ~0, and fix
 627     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 628     */
 629    ctx->Const.UniformBooleanTrue = ~0;
 630
 631    /* From the gen4 PRM, volume 4 page 127:
 632     *
 633     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 634     *      the base address of the first element of the surface, computed in
 635     *      software by adding the surface base address to the byte offset of
 636     *      the element in the buffer."
 637     *
 638     * However, unaligned accesses are slower, so enforce buffer alignment.
 639     */
 640    ctx->Const.UniformBufferOffsetAlignment = 16;
 641
 642    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 643     * that we can safely have the CPU and GPU writing the same SSBO on
 644     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 645     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 646     * be updating disjoint regions of the buffer simultaneously and that will
 647     * break if the regions overlap the same cacheline.
 648     */
 649    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 650    ctx->Const.TextureBufferOffsetAlignment = 16;
 651    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 652
 653    if (brw->gen >= 6) {
 654       ctx->Const.MaxVarying = 32;
 655       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 656       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 657       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 658       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 659       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 660       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 661       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 662       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 663    }
 664
 665    /* We want the GLSL compiler to emit code that uses condition codes */
 666    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 667       ctx->Const.ShaderCompilerOptions[i] =
 668          brw->intelScreen->compiler->glsl_compiler_options[i];
 669    }
 670
 671    /* ARB_viewport_array */
 672    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 673       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 674       ctx->Const.ViewportSubpixelBits = 0;
 675
 676       /* Cast to float before negating because MaxViewportWidth is unsigned.
 677        */
 678       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 679       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 680    }
 681
 682    /* ARB_gpu_shader5 */
 683    if (brw->gen >= 7)
 684       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 685
 686    /* ARB_framebuffer_no_attachments */
 687    ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
 688    ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
 689    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 690    ctx->Const.MaxFramebufferSamples = max_samples;
 691 }
 692
 693 static void
 694 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
 695 {
 696    struct gl_context *ctx = &brw->ctx;
 697
 698    /* For ES, we set these constants based on SIMD8.
 699     *
 700     * TODO: Once we can always generate SIMD16, we should update this.
 701     *
 702     * For GL, we assume we can generate a SIMD16 program, but this currently
 703     * is not always true. This allows us to run more test cases, and will be
 704     * required based on desktop GL compute shader requirements.
 705     */
 706    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 707
 708    const uint32_t max_invocations = simd_size * max_threads;
 709    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 710    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 711    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 712    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 713 }
 714
 715 /**
 716  * Process driconf (drirc) options, setting appropriate context flags.
 717  *
 718  * intelInitExtensions still pokes at optionCache directly, in order to
 719  * avoid advertising various extensions.  No flags are set, so it makes
 720  * sense to continue doing that there.
 721  */
 722 static void
 723 brw_process_driconf_options(struct brw_context *brw)
 724 {
 725    struct gl_context *ctx = &brw->ctx;
 726
 727    driOptionCache *options = &brw->optionCache;
 728    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 729                        brw->driContext->driScreenPriv->myNum, "i965");
 730
 731    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 732    switch (bo_reuse_mode) {
 733    case DRI_CONF_BO_REUSE_DISABLED:
 734       break;
 735    case DRI_CONF_BO_REUSE_ALL:
 736       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 737       break;
 738    }
 739
 740    if (!driQueryOptionb(options, "hiz")) {
 741        brw->has_hiz = false;
 742        /* On gen6, you can only do separate stencil with HIZ. */
 743        if (brw->gen == 6)
 744           brw->has_separate_stencil = false;
 745    }
 746
 747    if (driQueryOptionb(options, "always_flush_batch")) {
 748       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 749       brw->always_flush_batch = true;
 750    }
 751
 752    if (driQueryOptionb(options, "always_flush_cache")) {
 753       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 754       brw->always_flush_cache = true;
 755    }
 756
 757    if (driQueryOptionb(options, "disable_throttling")) {
 758       fprintf(stderr, "disabling flush throttling\n");
 759       brw->disable_throttling = true;
 760    }
 761
 762    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 763
 764    ctx->Const.ForceGLSLExtensionsWarn =
 765       driQueryOptionb(options, "force_glsl_extensions_warn");
 766
 767    ctx->Const.DisableGLSLLineContinuations =
 768       driQueryOptionb(options, "disable_glsl_line_continuations");
 769
 770    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 771       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 772
 773    brw->dual_color_blend_by_location =
 774       driQueryOptionb(options, "dual_color_blend_by_location");
 775 }
 776
 777 GLboolean
 778 brwCreateContext(gl_api api,
 779                  const struct gl_config *mesaVis,
 780                  __DRIcontext *driContextPriv,
 781                  unsigned major_version,
 782                  unsigned minor_version,
 783                  uint32_t flags,
 784                  bool notify_reset,
 785                  unsigned *dri_ctx_error,
 786                  void *sharedContextPrivate)
 787 {
 788    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 789    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 790    struct intel_screen *screen = sPriv->driverPrivate;
 791    const struct brw_device_info *devinfo = screen->devinfo;
 792    struct dd_function_table functions;
 793
 794    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 795     * provides us with context reset notifications.
 796     */
 797    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 798       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 799
 800    if (screen->has_context_reset_notification)
 801       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 802
 803    if (flags & ~allowed_flags) {
 804       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 805       return false;
 806    }
 807
 808    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 809    if (!brw) {
 810       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 811       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 812       return false;
 813    }
 814
 815    driContextPriv->driverPrivate = brw;
 816    brw->driContext = driContextPriv;
 817    brw->intelScreen = screen;
 818    brw->bufmgr = screen->bufmgr;
 819
 820    brw->gen = devinfo->gen;
 821    brw->gt = devinfo->gt;
 822    brw->is_g4x = devinfo->is_g4x;
 823    brw->is_baytrail = devinfo->is_baytrail;
 824    brw->is_haswell = devinfo->is_haswell;
 825    brw->is_cherryview = devinfo->is_cherryview;
 826    brw->is_broxton = devinfo->is_broxton;
 827    brw->has_llc = devinfo->has_llc;
 828    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 829    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 830    brw->has_pln = devinfo->has_pln;
 831    brw->has_compr4 = devinfo->has_compr4;
 832    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 833    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 834    brw->needs_unlit_centroid_workaround =
 835       devinfo->needs_unlit_centroid_workaround;
 836
 837    brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
 838    brw->has_swizzling = screen->hw_has_swizzling;
 839
 840    brw->vs.base.stage = MESA_SHADER_VERTEX;
 841    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 842    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 843    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 844    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 845    if (brw->gen >= 8) {
 846       gen8_init_vtable_surface_functions(brw);
 847       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 848    } else if (brw->gen >= 7) {
 849       gen7_init_vtable_surface_functions(brw);
 850       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 851    } else if (brw->gen >= 6) {
 852       gen6_init_vtable_surface_functions(brw);
 853       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 854    } else {
 855       gen4_init_vtable_surface_functions(brw);
 856       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 857    }
 858
 859    brw_init_driver_functions(brw, &functions);
 860
 861    if (notify_reset)
 862       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 863
 864    struct gl_context *ctx = &brw->ctx;
 865
 866    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 867       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 868       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 869       intelDestroyContext(driContextPriv);
 870       return false;
 871    }
 872
 873    driContextSetFlags(ctx, flags);
 874
 875    /* Initialize the software rasterizer and helper modules.
 876     *
 877     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 878     * software fallbacks (which we have to support on legacy GL to do weird
 879     * glDrawPixels(), glBitmap(), and other functions).
 880     */
 881    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 882       _swrast_CreateContext(ctx);
 883    }
 884
 885    _vbo_CreateContext(ctx);
 886    if (ctx->swrast_context) {
 887       _tnl_CreateContext(ctx);
 888       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 889       _swsetup_CreateContext(ctx);
 890
 891       /* Configure swrast to match hardware characteristics: */
 892       _swrast_allow_pixel_fog(ctx, false);
 893       _swrast_allow_vertex_fog(ctx, true);
 894    }
 895
 896    _mesa_meta_init(ctx);
 897
 898    brw_process_driconf_options(brw);
 899
 900    if (INTEL_DEBUG & DEBUG_PERF)
 901       brw->perf_debug = true;
 902
 903    brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
 904    brw_initialize_context_constants(brw);
 905
 906    ctx->Const.ResetStrategy = notify_reset
 907       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 908
 909    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 910    _mesa_init_point(ctx);
 911
 912    intel_fbo_init(brw);
 913
 914    intel_batchbuffer_init(brw);
 915
 916    if (brw->gen >= 6) {
 917       /* Create a new hardware context.  Using a hardware context means that
 918        * our GPU state will be saved/restored on context switch, allowing us
 919        * to assume that the GPU is in the same state we left it in.
 920        *
 921        * This is required for transform feedback buffer offsets, query objects,
 922        * and also allows us to reduce how much state we have to emit.
 923        */
 924       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 925
 926       if (!brw->hw_ctx) {
 927          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 928          intelDestroyContext(driContextPriv);
 929          return false;
 930       }
 931    }
 932
 933    if (brw_init_pipe_control(brw, devinfo)) {
 934       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 935       intelDestroyContext(driContextPriv);
 936       return false;
 937    }
 938
 939    brw_init_state(brw);
 940
 941    intelInitExtensions(ctx);
 942
 943    brw_init_surface_formats(brw);
 944
 945    brw->max_vs_threads = devinfo->max_vs_threads;
 946    brw->max_hs_threads = devinfo->max_hs_threads;
 947    brw->max_ds_threads = devinfo->max_ds_threads;
 948    brw->max_gs_threads = devinfo->max_gs_threads;
 949    brw->max_wm_threads = devinfo->max_wm_threads;
 950    brw->max_cs_threads = devinfo->max_cs_threads;
 951    brw->urb.size = devinfo->urb.size;
 952    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 953    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 954    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 955    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 956    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 957
 958    /* Estimate the size of the mappable aperture into the GTT.  There's an
 959     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 960     * It turns out it's basically always 256MB, though some ancient hardware
 961     * was smaller.
 962     */
 963    uint32_t gtt_size = 256 * 1024 * 1024;
 964
 965    /* We don't want to map two objects such that a memcpy between them would
 966     * just fault one mapping in and then the other over and over forever.  So
 967     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 968     * taken up by things like the framebuffer and the ringbuffer and such, so
 969     * be more conservative.
 970     */
 971    brw->max_gtt_map_object_size = gtt_size / 4;
 972
 973    if (brw->gen == 6)
 974       brw->urb.gs_present = false;
 975
 976    brw->prim_restart.in_progress = false;
 977    brw->prim_restart.enable_cut_index = false;
 978    brw->gs.enabled = false;
 979    brw->sf.viewport_transform_enable = true;
 980
 981    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 982
 983    brw->use_resource_streamer = screen->has_resource_streamer &&
 984       (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
 985        env_var_as_boolean("INTEL_USE_GATHER", false));
 986
 987    ctx->VertexProgram._MaintainTnlProgram = true;
 988    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 989
 990    brw_draw_init( brw );
 991
 992    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 993       /* Turn on some extra GL_ARB_debug_output generation. */
 994       brw->perf_debug = true;
 995    }
 996
 997    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 998       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 999
1000    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1001       brw_init_shader_time(brw);
1002
1003    _mesa_compute_version(ctx);
1004
1005    _mesa_initialize_dispatch_tables(ctx);
1006    _mesa_initialize_vbo_vtxfmt(ctx);
1007
1008    if (ctx->Extensions.AMD_performance_monitor) {
1009       brw_init_performance_monitors(brw);
1010    }
1011
1012    vbo_use_buffer_objects(ctx);
1013    vbo_always_unmap_buffers(ctx);
1014
1015    return true;
1016 }
1017
1018 void
1019 intelDestroyContext(__DRIcontext * driContextPriv)
1020 {
1021    struct brw_context *brw =
1022       (struct brw_context *) driContextPriv->driverPrivate;
1023    struct gl_context *ctx = &brw->ctx;
1024
1025    /* Dump a final BMP in case the application doesn't call SwapBuffers */
1026    if (INTEL_DEBUG & DEBUG_AUB) {
1027       intel_batchbuffer_flush(brw);
1028       aub_dump_bmp(&brw->ctx);
1029    }
1030
1031    _mesa_meta_free(&brw->ctx);
1032    brw_meta_fast_clear_free(brw);
1033
1034    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1035       /* Force a report. */
1036       brw->shader_time.report_time = 0;
1037
1038       brw_collect_and_report_shader_time(brw);
1039       brw_destroy_shader_time(brw);
1040    }
1041
1042    brw_destroy_state(brw);
1043    brw_draw_destroy(brw);
1044
1045    drm_intel_bo_unreference(brw->curbe.curbe_bo);
1046    if (brw->vs.base.scratch_bo)
1047       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1048    if (brw->gs.base.scratch_bo)
1049       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1050    if (brw->wm.base.scratch_bo)
1051       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1052
1053    gen7_reset_hw_bt_pool_offsets(brw);
1054    drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1055    brw->hw_bt_pool.bo = NULL;
1056
1057    drm_intel_gem_context_destroy(brw->hw_ctx);
1058
1059    if (ctx->swrast_context) {
1060       _swsetup_DestroyContext(&brw->ctx);
1061       _tnl_DestroyContext(&brw->ctx);
1062    }
1063    _vbo_DestroyContext(&brw->ctx);
1064
1065    if (ctx->swrast_context)
1066       _swrast_DestroyContext(&brw->ctx);
1067
1068    brw_fini_pipe_control(brw);
1069    intel_batchbuffer_free(brw);
1070
1071    drm_intel_bo_unreference(brw->throttle_batch[1]);
1072    drm_intel_bo_unreference(brw->throttle_batch[0]);
1073    brw->throttle_batch[1] = NULL;
1074    brw->throttle_batch[0] = NULL;
1075
1076    driDestroyOptionCache(&brw->optionCache);
1077
1078    /* free the Mesa context */
1079    _mesa_free_context_data(&brw->ctx);
1080
1081    ralloc_free(brw);
1082    driContextPriv->driverPrivate = NULL;
1083 }
1084
1085 GLboolean
1086 intelUnbindContext(__DRIcontext * driContextPriv)
1087 {
1088    /* Unset current context and dispath table */
1089    _mesa_make_current(NULL, NULL, NULL);
1090
1091    return true;
1092 }
1093
1094 /**
1095  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1096  * on window system framebuffers.
1097  *
1098  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1099  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1100  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1101  * for a visual where you're guaranteed to be capable, but it turns out that
1102  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1103  * incapable ones, because there's no difference between the two in resources
1104  * used.  Applications thus get built that accidentally rely on the default
1105  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1106  * great...
1107  *
1108  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1109  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1110  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1111  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1112  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1113  * and get no sRGB encode (assuming that both kinds of visual are available).
1114  * Thus our choice to support sRGB by default on our visuals for desktop would
1115  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1116  *
1117  * Unfortunately, renderbuffer setup happens before a context is created.  So
1118  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1119  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1120  * yet), we go turn that back off before anyone finds out.
1121  */
1122 static void
1123 intel_gles3_srgb_workaround(struct brw_context *brw,
1124                             struct gl_framebuffer *fb)
1125 {
1126    struct gl_context *ctx = &brw->ctx;
1127
1128    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1129       return;
1130
1131    /* Some day when we support the sRGB capable bit on visuals available for
1132     * GLES, we'll need to respect that and not disable things here.
1133     */
1134    fb->Visual.sRGBCapable = false;
1135    for (int i = 0; i < BUFFER_COUNT; i++) {
1136       if (fb->Attachment[i].Renderbuffer &&
1137           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1138          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1139       }
1140    }
1141 }
1142
1143 GLboolean
1144 intelMakeCurrent(__DRIcontext * driContextPriv,
1145                  __DRIdrawable * driDrawPriv,
1146                  __DRIdrawable * driReadPriv)
1147 {
1148    struct brw_context *brw;
1149    GET_CURRENT_CONTEXT(curCtx);
1150
1151    if (driContextPriv)
1152       brw = (struct brw_context *) driContextPriv->driverPrivate;
1153    else
1154       brw = NULL;
1155
1156    /* According to the glXMakeCurrent() man page: "Pending commands to
1157     * the previous context, if any, are flushed before it is released."
1158     * But only flush if we're actually changing contexts.
1159     */
1160    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1161       _mesa_flush(curCtx);
1162    }
1163
1164    if (driContextPriv) {
1165       struct gl_context *ctx = &brw->ctx;
1166       struct gl_framebuffer *fb, *readFb;
1167
1168       if (driDrawPriv == NULL) {
1169          fb = _mesa_get_incomplete_framebuffer();
1170       } else {
1171          fb = driDrawPriv->driverPrivate;
1172          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1173       }
1174
1175       if (driReadPriv == NULL) {
1176          readFb = _mesa_get_incomplete_framebuffer();
1177       } else {
1178          readFb = driReadPriv->driverPrivate;
1179          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1180       }
1181
1182       /* The sRGB workaround changes the renderbuffer's format. We must change
1183        * the format before the renderbuffer's miptree get's allocated, otherwise
1184        * the formats of the renderbuffer and its miptree will differ.
1185        */
1186       intel_gles3_srgb_workaround(brw, fb);
1187       intel_gles3_srgb_workaround(brw, readFb);
1188
1189       /* If the context viewport hasn't been initialized, force a call out to
1190        * the loader to get buffers so we have a drawable size for the initial
1191        * viewport. */
1192       if (!brw->ctx.ViewportInitialized)
1193          intel_prepare_render(brw);
1194
1195       _mesa_make_current(ctx, fb, readFb);
1196    } else {
1197       _mesa_make_current(NULL, NULL, NULL);
1198    }
1199
1200    return true;
1201 }
1202
1203 void
1204 intel_resolve_for_dri2_flush(struct brw_context *brw,
1205                              __DRIdrawable *drawable)
1206 {
1207    if (brw->gen < 6) {
1208       /* MSAA and fast color clear are not supported, so don't waste time
1209        * checking whether a resolve is needed.
1210        */
1211       return;
1212    }
1213
1214    struct gl_framebuffer *fb = drawable->driverPrivate;
1215    struct intel_renderbuffer *rb;
1216
1217    /* Usually, only the back buffer will need to be downsampled. However,
1218     * the front buffer will also need it if the user has rendered into it.
1219     */
1220    static const gl_buffer_index buffers[2] = {
1221          BUFFER_BACK_LEFT,
1222          BUFFER_FRONT_LEFT,
1223    };
1224
1225    for (int i = 0; i < 2; ++i) {
1226       rb = intel_get_renderbuffer(fb, buffers[i]);
1227       if (rb == NULL || rb->mt == NULL)
1228          continue;
1229       if (rb->mt->num_samples <= 1)
1230          intel_miptree_resolve_color(brw, rb->mt);
1231       else
1232          intel_renderbuffer_downsample(brw, rb);
1233    }
1234 }
1235
1236 static unsigned
1237 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1238 {
1239    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1240 }
1241
1242 static void
1243 intel_query_dri2_buffers(struct brw_context *brw,
1244                          __DRIdrawable *drawable,
1245                          __DRIbuffer **buffers,
1246                          int *count);
1247
1248 static void
1249 intel_process_dri2_buffer(struct brw_context *brw,
1250                           __DRIdrawable *drawable,
1251                           __DRIbuffer *buffer,
1252                           struct intel_renderbuffer *rb,
1253                           const char *buffer_name);
1254
1255 static void
1256 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1257
1258 static void
1259 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1260 {
1261    struct gl_framebuffer *fb = drawable->driverPrivate;
1262    struct intel_renderbuffer *rb;
1263    __DRIbuffer *buffers = NULL;
1264    int i, count;
1265    const char *region_name;
1266
1267    /* Set this up front, so that in case our buffers get invalidated
1268     * while we're getting new buffers, we don't clobber the stamp and
1269     * thus ignore the invalidate. */
1270    drawable->lastStamp = drawable->dri2.stamp;
1271
1272    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1273       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1274
1275    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1276
1277    if (buffers == NULL)
1278       return;
1279
1280    for (i = 0; i < count; i++) {
1281        switch (buffers[i].attachment) {
1282        case __DRI_BUFFER_FRONT_LEFT:
1283            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1284            region_name = "dri2 front buffer";
1285            break;
1286
1287        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1288            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1289            region_name = "dri2 fake front buffer";
1290            break;
1291
1292        case __DRI_BUFFER_BACK_LEFT:
1293            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1294            region_name = "dri2 back buffer";
1295            break;
1296
1297        case __DRI_BUFFER_DEPTH:
1298        case __DRI_BUFFER_HIZ:
1299        case __DRI_BUFFER_DEPTH_STENCIL:
1300        case __DRI_BUFFER_STENCIL:
1301        case __DRI_BUFFER_ACCUM:
1302        default:
1303            fprintf(stderr,
1304                    "unhandled buffer attach event, attachment type %d\n",
1305                    buffers[i].attachment);
1306            return;
1307        }
1308
1309        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1310    }
1311
1312 }
1313
1314 void
1315 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1316 {
1317    struct brw_context *brw = context->driverPrivate;
1318    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1319
1320    /* Set this up front, so that in case our buffers get invalidated
1321     * while we're getting new buffers, we don't clobber the stamp and
1322     * thus ignore the invalidate. */
1323    drawable->lastStamp = drawable->dri2.stamp;
1324
1325    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1326       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1327
1328    if (screen->image.loader)
1329       intel_update_image_buffers(brw, drawable);
1330    else
1331       intel_update_dri2_buffers(brw, drawable);
1332
1333    driUpdateFramebufferSize(&brw->ctx, drawable);
1334 }
1335
1336 /**
1337  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1338  * state is required.
1339  */
1340 void
1341 intel_prepare_render(struct brw_context *brw)
1342 {
1343    struct gl_context *ctx = &brw->ctx;
1344    __DRIcontext *driContext = brw->driContext;
1345    __DRIdrawable *drawable;
1346
1347    drawable = driContext->driDrawablePriv;
1348    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1349       if (drawable->lastStamp != drawable->dri2.stamp)
1350          intel_update_renderbuffers(driContext, drawable);
1351       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1352    }
1353
1354    drawable = driContext->driReadablePriv;
1355    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1356       if (drawable->lastStamp != drawable->dri2.stamp)
1357          intel_update_renderbuffers(driContext, drawable);
1358       driContext->dri2.read_stamp = drawable->dri2.stamp;
1359    }
1360
1361    /* If we're currently rendering to the front buffer, the rendering
1362     * that will happen next will probably dirty the front buffer.  So
1363     * mark it as dirty here.
1364     */
1365    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1366       brw->front_buffer_dirty = true;
1367 }
1368
1369 /**
1370  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1371  *
1372  * To determine which DRI buffers to request, examine the renderbuffers
1373  * attached to the drawable's framebuffer. Then request the buffers with
1374  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1375  *
1376  * This is called from intel_update_renderbuffers().
1377  *
1378  * \param drawable      Drawable whose buffers are queried.
1379  * \param buffers       [out] List of buffers returned by DRI2 query.
1380  * \param buffer_count  [out] Number of buffers returned.
1381  *
1382  * \see intel_update_renderbuffers()
1383  * \see DRI2GetBuffers()
1384  * \see DRI2GetBuffersWithFormat()
1385  */
1386 static void
1387 intel_query_dri2_buffers(struct brw_context *brw,
1388                          __DRIdrawable *drawable,
1389                          __DRIbuffer **buffers,
1390                          int *buffer_count)
1391 {
1392    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1393    struct gl_framebuffer *fb = drawable->driverPrivate;
1394    int i = 0;
1395    unsigned attachments[8];
1396
1397    struct intel_renderbuffer *front_rb;
1398    struct intel_renderbuffer *back_rb;
1399
1400    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1401    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1402
1403    memset(attachments, 0, sizeof(attachments));
1404    if ((_mesa_is_front_buffer_drawing(fb) ||
1405         _mesa_is_front_buffer_reading(fb) ||
1406         !back_rb) && front_rb) {
1407       /* If a fake front buffer is in use, then querying for
1408        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1409        * the real front buffer to the fake front buffer.  So before doing the
1410        * query, we need to make sure all the pending drawing has landed in the
1411        * real front buffer.
1412        */
1413       intel_batchbuffer_flush(brw);
1414       intel_flush_front(&brw->ctx);
1415
1416       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1417       attachments[i++] = intel_bits_per_pixel(front_rb);
1418    } else if (front_rb && brw->front_buffer_dirty) {
1419       /* We have pending front buffer rendering, but we aren't querying for a
1420        * front buffer.  If the front buffer we have is a fake front buffer,
1421        * the X server is going to throw it away when it processes the query.
1422        * So before doing the query, make sure all the pending drawing has
1423        * landed in the real front buffer.
1424        */
1425       intel_batchbuffer_flush(brw);
1426       intel_flush_front(&brw->ctx);
1427    }
1428
1429    if (back_rb) {
1430       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1431       attachments[i++] = intel_bits_per_pixel(back_rb);
1432    }
1433
1434    assert(i <= ARRAY_SIZE(attachments));
1435
1436    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1437                                                         &drawable->w,
1438                                                         &drawable->h,
1439                                                         attachments, i / 2,
1440                                                         buffer_count,
1441                                                         drawable->loaderPrivate);
1442 }
1443
1444 /**
1445  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1446  *
1447  * This is called from intel_update_renderbuffers().
1448  *
1449  * \par Note:
1450  *    DRI buffers whose attachment point is DRI2BufferStencil or
1451  *    DRI2BufferDepthStencil are handled as special cases.
1452  *
1453  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1454  *        that is passed to drm_intel_bo_gem_create_from_name().
1455  *
1456  * \see intel_update_renderbuffers()
1457  */
1458 static void
1459 intel_process_dri2_buffer(struct brw_context *brw,
1460                           __DRIdrawable *drawable,
1461                           __DRIbuffer *buffer,
1462                           struct intel_renderbuffer *rb,
1463                           const char *buffer_name)
1464 {
1465    struct gl_framebuffer *fb = drawable->driverPrivate;
1466    drm_intel_bo *bo;
1467
1468    if (!rb)
1469       return;
1470
1471    unsigned num_samples = rb->Base.Base.NumSamples;
1472
1473    /* We try to avoid closing and reopening the same BO name, because the first
1474     * use of a mapping of the buffer involves a bunch of page faulting which is
1475     * moderately expensive.
1476     */
1477    struct intel_mipmap_tree *last_mt;
1478    if (num_samples == 0)
1479       last_mt = rb->mt;
1480    else
1481       last_mt = rb->singlesample_mt;
1482
1483    uint32_t old_name = 0;
1484    if (last_mt) {
1485        /* The bo already has a name because the miptree was created by a
1486         * previous call to intel_process_dri2_buffer(). If a bo already has a
1487         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1488         * create a new name.
1489         */
1490       drm_intel_bo_flink(last_mt->bo, &old_name);
1491    }
1492
1493    if (old_name == buffer->name)
1494       return;
1495
1496    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1497       fprintf(stderr,
1498               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1499               buffer->name, buffer->attachment,
1500               buffer->cpp, buffer->pitch);
1501    }
1502
1503    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1504                                           buffer->name);
1505    if (!bo) {
1506       fprintf(stderr,
1507               "Failed to open BO for returned DRI2 buffer "
1508               "(%dx%d, %s, named %d).\n"
1509               "This is likely a bug in the X Server that will lead to a "
1510               "crash soon.\n",
1511               drawable->w, drawable->h, buffer_name, buffer->name);
1512       return;
1513    }
1514
1515    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1516                                             drawable->w, drawable->h,
1517                                             buffer->pitch);
1518
1519    if (_mesa_is_front_buffer_drawing(fb) &&
1520        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1521         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1522        rb->Base.Base.NumSamples > 1) {
1523       intel_renderbuffer_upsample(brw, rb);
1524    }
1525
1526    assert(rb->mt);
1527
1528    drm_intel_bo_unreference(bo);
1529 }
1530
1531 /**
1532  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1533  *
1534  * To determine which DRI buffers to request, examine the renderbuffers
1535  * attached to the drawable's framebuffer. Then request the buffers from
1536  * the image loader
1537  *
1538  * This is called from intel_update_renderbuffers().
1539  *
1540  * \param drawable      Drawable whose buffers are queried.
1541  * \param buffers       [out] List of buffers returned by DRI2 query.
1542  * \param buffer_count  [out] Number of buffers returned.
1543  *
1544  * \see intel_update_renderbuffers()
1545  */
1546
1547 static void
1548 intel_update_image_buffer(struct brw_context *intel,
1549                           __DRIdrawable *drawable,
1550                           struct intel_renderbuffer *rb,
1551                           __DRIimage *buffer,
1552                           enum __DRIimageBufferMask buffer_type)
1553 {
1554    struct gl_framebuffer *fb = drawable->driverPrivate;
1555
1556    if (!rb || !buffer->bo)
1557       return;
1558
1559    unsigned num_samples = rb->Base.Base.NumSamples;
1560
1561    /* Check and see if we're already bound to the right
1562     * buffer object
1563     */
1564    struct intel_mipmap_tree *last_mt;
1565    if (num_samples == 0)
1566       last_mt = rb->mt;
1567    else
1568       last_mt = rb->singlesample_mt;
1569
1570    if (last_mt && last_mt->bo == buffer->bo)
1571       return;
1572
1573    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1574                                             buffer->width, buffer->height,
1575                                             buffer->pitch);
1576
1577    if (_mesa_is_front_buffer_drawing(fb) &&
1578        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1579        rb->Base.Base.NumSamples > 1) {
1580       intel_renderbuffer_upsample(intel, rb);
1581    }
1582 }
1583
1584 static void
1585 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1586 {
1587    struct gl_framebuffer *fb = drawable->driverPrivate;
1588    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1589    struct intel_renderbuffer *front_rb;
1590    struct intel_renderbuffer *back_rb;
1591    struct __DRIimageList images;
1592    unsigned int format;
1593    uint32_t buffer_mask = 0;
1594
1595    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1596    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1597
1598    if (back_rb)
1599       format = intel_rb_format(back_rb);
1600    else if (front_rb)
1601       format = intel_rb_format(front_rb);
1602    else
1603       return;
1604
1605    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1606                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1607       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1608    }
1609
1610    if (back_rb)
1611       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1612
1613    (*screen->image.loader->getBuffers) (drawable,
1614                                         driGLFormatToImageFormat(format),
1615                                         &drawable->dri2.stamp,
1616                                         drawable->loaderPrivate,
1617                                         buffer_mask,
1618                                         &images);
1619
1620    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1621       drawable->w = images.front->width;
1622       drawable->h = images.front->height;
1623       intel_update_image_buffer(brw,
1624                                 drawable,
1625                                 front_rb,
1626                                 images.front,
1627                                 __DRI_IMAGE_BUFFER_FRONT);
1628    }
1629    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1630       drawable->w = images.back->width;
1631       drawable->h = images.back->height;
1632       intel_update_image_buffer(brw,
1633                                 drawable,
1634                                 back_rb,
1635                                 images.back,
1636                                 __DRI_IMAGE_BUFFER_BACK);
1637    }
1638 }