src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44 #include "main/framebuffer.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/driverfuncs.h"
  49 #include "drivers/common/meta.h"
  50 #include "utils.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_defines.h"
  54 #include "brw_compiler.h"
  55 #include "brw_draw.h"
  56 #include "brw_state.h"
  57
  58 #include "intel_batchbuffer.h"
  59 #include "intel_buffer_objects.h"
  60 #include "intel_buffers.h"
  61 #include "intel_fbo.h"
  62 #include "intel_mipmap_tree.h"
  63 #include "intel_pixel.h"
  64 #include "intel_image.h"
  65 #include "intel_tex.h"
  66 #include "intel_tex_obj.h"
  67
  68 #include "swrast_setup/swrast_setup.h"
  69 #include "tnl/tnl.h"
  70 #include "tnl/t_pipeline.h"
  71 #include "util/ralloc.h"
  72 #include "util/debug.h"
  73
  74 /***************************************
  75  * Mesa's Driver Functions
  76  ***************************************/
  77
  78 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  79
  80 const char *
  81 brw_get_renderer_string(unsigned deviceID)
  82 {
  83    const char *chipset;
  84    static char buffer[128];
  85
  86    switch (deviceID) {
  87 #undef CHIPSET
  88 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
  89 #include "pci_ids/i965_pci_ids.h"
  90    default:
  91       chipset = "Unknown Intel Chipset";
  92       break;
  93    }
  94
  95    (void) driGetRendererString(buffer, chipset, 0);
  96    return buffer;
  97 }
  98
  99 static const GLubyte *
 100 intel_get_string(struct gl_context * ctx, GLenum name)
 101 {
 102    const struct brw_context *const brw = brw_context(ctx);
 103
 104    switch (name) {
 105    case GL_VENDOR:
 106       return (GLubyte *) brw_vendor_string;
 107
 108    case GL_RENDERER:
 109       return
 110          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 111
 112    default:
 113       return NULL;
 114    }
 115 }
 116
 117 static void
 118 intel_viewport(struct gl_context *ctx)
 119 {
 120    struct brw_context *brw = brw_context(ctx);
 121    __DRIcontext *driContext = brw->driContext;
 122
 123    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 124       if (driContext->driDrawablePriv)
 125          dri2InvalidateDrawable(driContext->driDrawablePriv);
 126       if (driContext->driReadablePriv)
 127          dri2InvalidateDrawable(driContext->driReadablePriv);
 128    }
 129 }
 130
 131 static void
 132 intel_update_framebuffer(struct gl_context *ctx,
 133                          struct gl_framebuffer *fb)
 134 {
 135    struct brw_context *brw = brw_context(ctx);
 136
 137    /* Quantize the derived default number of samples
 138     */
 139    fb->DefaultGeometry._NumSamples =
 140       intel_quantize_num_samples(brw->intelScreen,
 141                                  fb->DefaultGeometry.NumSamples);
 142 }
 143
 144 static void
 145 intel_update_state(struct gl_context * ctx, GLuint new_state)
 146 {
 147    struct brw_context *brw = brw_context(ctx);
 148    struct intel_texture_object *tex_obj;
 149    struct intel_renderbuffer *depth_irb;
 150
 151    if (ctx->swrast_context)
 152       _swrast_InvalidateState(ctx, new_state);
 153    _vbo_InvalidateState(ctx, new_state);
 154
 155    brw->NewGLState |= new_state;
 156
 157    _mesa_unlock_context_textures(ctx);
 158
 159    /* Resolve the depth buffer's HiZ buffer. */
 160    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 161    if (depth_irb)
 162       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 163
 164    /* Resolve depth buffer and render cache of each enabled texture. */
 165    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 166    for (int i = 0; i <= maxEnabledUnit; i++) {
 167       if (!ctx->Texture.Unit[i]._Current)
 168          continue;
 169       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 170       if (!tex_obj || !tex_obj->mt)
 171          continue;
 172       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 173       /* Sampling engine understands lossless compression and resolving
 174        * those surfaces should be skipped for performance reasons.
 175        */
 176       intel_miptree_resolve_color(brw, tex_obj->mt,
 177                                   INTEL_MIPTREE_IGNORE_CCS_E);
 178       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 179    }
 180
 181    /* Resolve color for each active shader image. */
 182    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 183       const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
 184          ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
 185
 186       if (unlikely(shader && shader->NumImages)) {
 187          for (unsigned j = 0; j < shader->NumImages; j++) {
 188             struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
 189             tex_obj = intel_texture_object(u->TexObj);
 190
 191             if (tex_obj && tex_obj->mt) {
 192                /* Access to images is implemented using indirect messages
 193                 * against data port. Normal render target write understands
 194                 * lossless compression but unfortunately the typed/untyped
 195                 * read/write interface doesn't. Therefore the compressed
 196                 * surfaces need to be resolved prior to accessing them.
 197                 */
 198                intel_miptree_resolve_color(brw, tex_obj->mt, 0);
 199                brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 200             }
 201          }
 202       }
 203    }
 204
 205    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
 206     * single-sampled color renderbuffers because the CCS buffer isn't
 207     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
 208     * enabled because otherwise the surface state will be programmed with the
 209     * linear equivalent format anyway.
 210     */
 211    if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
 212       struct gl_framebuffer *fb = ctx->DrawBuffer;
 213       for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
 214          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
 215
 216          if (rb == NULL)
 217             continue;
 218
 219          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 220          struct intel_mipmap_tree *mt = irb->mt;
 221
 222          if (mt == NULL ||
 223              mt->num_samples > 1 ||
 224              _mesa_get_srgb_format_linear(mt->format) == mt->format)
 225                continue;
 226
 227          /* Lossless compression is not supported for SRGB formats, it
 228           * should be impossible to get here with such surfaces.
 229           */
 230          assert(!intel_miptree_is_lossless_compressed(brw, mt));
 231          intel_miptree_resolve_color(brw, mt, 0);
 232          brw_render_cache_set_check_flush(brw, mt->bo);
 233       }
 234    }
 235
 236    _mesa_lock_context_textures(ctx);
 237
 238    if (new_state & _NEW_BUFFERS) {
 239       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 240       if (ctx->DrawBuffer != ctx->ReadBuffer)
 241          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 242    }
 243 }
 244
 245 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 246
 247 static void
 248 intel_flush_front(struct gl_context *ctx)
 249 {
 250    struct brw_context *brw = brw_context(ctx);
 251    __DRIcontext *driContext = brw->driContext;
 252    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 253    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 254
 255    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 256       if (flushFront(screen) && driDrawable &&
 257           driDrawable->loaderPrivate) {
 258
 259          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 260           *
 261           * This potentially resolves both front and back buffer. It
 262           * is unnecessary to resolve the back, but harms nothing except
 263           * performance. And no one cares about front-buffer render
 264           * performance.
 265           */
 266          intel_resolve_for_dri2_flush(brw, driDrawable);
 267          intel_batchbuffer_flush(brw);
 268
 269          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 270
 271          /* We set the dirty bit in intel_prepare_render() if we're
 272           * front buffer rendering once we get there.
 273           */
 274          brw->front_buffer_dirty = false;
 275       }
 276    }
 277 }
 278
 279 static void
 280 intel_glFlush(struct gl_context *ctx)
 281 {
 282    struct brw_context *brw = brw_context(ctx);
 283
 284    intel_batchbuffer_flush(brw);
 285    intel_flush_front(ctx);
 286
 287    brw->need_flush_throttle = true;
 288 }
 289
 290 static void
 291 intel_finish(struct gl_context * ctx)
 292 {
 293    struct brw_context *brw = brw_context(ctx);
 294
 295    intel_glFlush(ctx);
 296
 297    if (brw->batch.last_bo)
 298       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 299 }
 300
 301 static void
 302 brw_init_driver_functions(struct brw_context *brw,
 303                           struct dd_function_table *functions)
 304 {
 305    _mesa_init_driver_functions(functions);
 306
 307    /* GLX uses DRI2 invalidate events to handle window resizing.
 308     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 309     * which doesn't provide a mechanism for snooping the event queues.
 310     *
 311     * So EGL still relies on viewport hacks to handle window resizing.
 312     * This should go away with DRI3000.
 313     */
 314    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 315       functions->Viewport = intel_viewport;
 316
 317    functions->Flush = intel_glFlush;
 318    functions->Finish = intel_finish;
 319    functions->GetString = intel_get_string;
 320    functions->UpdateState = intel_update_state;
 321
 322    intelInitTextureFuncs(functions);
 323    intelInitTextureImageFuncs(functions);
 324    intelInitTextureSubImageFuncs(functions);
 325    intelInitTextureCopyImageFuncs(functions);
 326    intelInitCopyImageFuncs(functions);
 327    intelInitClearFuncs(functions);
 328    intelInitBufferFuncs(functions);
 329    intelInitPixelFuncs(functions);
 330    intelInitBufferObjectFuncs(functions);
 331    intel_init_syncobj_functions(functions);
 332    brw_init_object_purgeable_functions(functions);
 333
 334    brwInitFragProgFuncs( functions );
 335    brw_init_common_queryobj_functions(functions);
 336    if (brw->gen >= 6)
 337       gen6_init_queryobj_functions(functions);
 338    else
 339       gen4_init_queryobj_functions(functions);
 340    brw_init_compute_functions(functions);
 341    if (brw->gen >= 7)
 342       brw_init_conditional_render_functions(functions);
 343
 344    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 345    functions->QueryInternalFormat = brw_query_internal_format;
 346
 347    functions->NewTransformFeedback = brw_new_transform_feedback;
 348    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 349    functions->GetTransformFeedbackVertexCount =
 350       brw_get_transform_feedback_vertex_count;
 351    if (brw->gen >= 7) {
 352       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 353       functions->EndTransformFeedback = gen7_end_transform_feedback;
 354       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 355       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 356    } else {
 357       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 358       functions->EndTransformFeedback = brw_end_transform_feedback;
 359    }
 360
 361    if (brw->gen >= 6)
 362       functions->GetSamplePosition = gen6_get_sample_position;
 363 }
 364
 365 static void
 366 brw_initialize_context_constants(struct brw_context *brw)
 367 {
 368    struct gl_context *ctx = &brw->ctx;
 369    const struct brw_compiler *compiler = brw->intelScreen->compiler;
 370
 371    const bool stage_exists[MESA_SHADER_STAGES] = {
 372       [MESA_SHADER_VERTEX] = true,
 373       [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
 374       [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
 375       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
 376       [MESA_SHADER_FRAGMENT] = true,
 377       [MESA_SHADER_COMPUTE] =
 378          (ctx->API == API_OPENGL_CORE &&
 379           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 380          (ctx->API == API_OPENGLES2 &&
 381           ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
 382          _mesa_extension_override_enables.ARB_compute_shader,
 383    };
 384
 385    unsigned num_stages = 0;
 386    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 387       if (stage_exists[i])
 388          num_stages++;
 389    }
 390
 391    unsigned max_samplers =
 392       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 393
 394    ctx->Const.MaxDualSourceDrawBuffers = 1;
 395    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 396    ctx->Const.MaxCombinedShaderOutputResources =
 397       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 398
 399    ctx->Const.QueryCounterBits.Timestamp = 36;
 400
 401    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 402    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 403    ctx->Const.MaxRenderbufferSize = 8192;
 404    ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
 405    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 406    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 407    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
 408    ctx->Const.MaxTextureMbytes = 1536;
 409    ctx->Const.MaxTextureRectSize = 1 << 12;
 410    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 411    ctx->Const.StripTextureBorder = true;
 412    if (brw->gen >= 7)
 413       ctx->Const.MaxProgramTextureGatherComponents = 4;
 414    else if (brw->gen == 6)
 415       ctx->Const.MaxProgramTextureGatherComponents = 1;
 416
 417    ctx->Const.MaxUniformBlockSize = 65536;
 418
 419    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 420       struct gl_program_constants *prog = &ctx->Const.Program[i];
 421
 422       if (!stage_exists[i])
 423          continue;
 424
 425       prog->MaxTextureImageUnits = max_samplers;
 426
 427       prog->MaxUniformBlocks = BRW_MAX_UBO;
 428       prog->MaxCombinedUniformComponents =
 429          prog->MaxUniformComponents +
 430          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 431
 432       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 433       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 434       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 435       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 436    }
 437
 438    ctx->Const.MaxTextureUnits =
 439       MIN2(ctx->Const.MaxTextureCoordUnits,
 440            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 441
 442    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 443    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 444    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 445    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 446    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 447    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 448    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 449
 450
 451    /* Hardware only supports a limited number of transform feedback buffers.
 452     * So we need to override the Mesa default (which is based only on software
 453     * limits).
 454     */
 455    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 456
 457    /* On Gen6, in the worst case, we use up one binding table entry per
 458     * transform feedback component (see comments above the definition of
 459     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 460     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 461     * BRW_MAX_SOL_BINDINGS.
 462     *
 463     * In "separate components" mode, we need to divide this value by
 464     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 465     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 466     */
 467    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 468    ctx->Const.MaxTransformFeedbackSeparateComponents =
 469       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 470
 471    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 472
 473    int max_samples;
 474    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 475    const int clamp_max_samples =
 476       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 477
 478    if (clamp_max_samples < 0) {
 479       max_samples = msaa_modes[0];
 480    } else {
 481       /* Select the largest supported MSAA mode that does not exceed
 482        * clamp_max_samples.
 483        */
 484       max_samples = 0;
 485       for (int i = 0; msaa_modes[i] != 0; ++i) {
 486          if (msaa_modes[i] <= clamp_max_samples) {
 487             max_samples = msaa_modes[i];
 488             break;
 489          }
 490       }
 491    }
 492
 493    ctx->Const.MaxSamples = max_samples;
 494    ctx->Const.MaxColorTextureSamples = max_samples;
 495    ctx->Const.MaxDepthTextureSamples = max_samples;
 496    ctx->Const.MaxIntegerSamples = max_samples;
 497    ctx->Const.MaxImageSamples = 0;
 498
 499    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 500     * to map indices of rectangular grid to sample numbers within a pixel.
 501     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 502     * extension implementation. For more details see the comment above
 503     * gen6_set_sample_maps() definition.
 504     */
 505    gen6_set_sample_maps(ctx);
 506
 507    ctx->Const.MinLineWidth = 1.0;
 508    ctx->Const.MinLineWidthAA = 1.0;
 509    if (brw->gen >= 6) {
 510       ctx->Const.MaxLineWidth = 7.375;
 511       ctx->Const.MaxLineWidthAA = 7.375;
 512       ctx->Const.LineWidthGranularity = 0.125;
 513    } else {
 514       ctx->Const.MaxLineWidth = 7.0;
 515       ctx->Const.MaxLineWidthAA = 7.0;
 516       ctx->Const.LineWidthGranularity = 0.5;
 517    }
 518
 519    /* For non-antialiased lines, we have to round the line width to the
 520     * nearest whole number. Make sure that we don't advertise a line
 521     * width that, when rounded, will be beyond the actual hardware
 522     * maximum.
 523     */
 524    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 525
 526    ctx->Const.MinPointSize = 1.0;
 527    ctx->Const.MinPointSizeAA = 1.0;
 528    ctx->Const.MaxPointSize = 255.0;
 529    ctx->Const.MaxPointSizeAA = 255.0;
 530    ctx->Const.PointSizeGranularity = 1.0;
 531
 532    if (brw->gen >= 5 || brw->is_g4x)
 533       ctx->Const.MaxClipPlanes = 8;
 534
 535    ctx->Const.LowerTessLevel = true;
 536
 537    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 538    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 539    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 540    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 541    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 542    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 543    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 544    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 545    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 546    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 547    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 548    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 549       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 550            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 551
 552    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 553    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 554    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 555    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 556    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 557    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 558    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 559    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 560    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 561       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 562            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 563
 564    /* Fragment shaders use real, 32-bit twos-complement integers for all
 565     * integer types.
 566     */
 567    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 568    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 569    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 570    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 571    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 572
 573    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 574    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 575    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 576    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 577    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 578
 579    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 580     * but we're not sure how it's actually done for vertex order,
 581     * that affect provoking vertex decision. Always use last vertex
 582     * convention for quad primitive which works as expected for now.
 583     */
 584    if (brw->gen >= 6)
 585       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 586
 587    ctx->Const.NativeIntegers = true;
 588    ctx->Const.VertexID_is_zero_based = true;
 589
 590    /* Regarding the CMP instruction, the Ivybridge PRM says:
 591     *
 592     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 593     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 594     *    0xFFFFFFFF) is assigned to dst."
 595     *
 596     * but PRMs for earlier generations say
 597     *
 598     *   "In dword format, one GRF may store up to 8 results. When the register
 599     *    is used later as a vector of Booleans, as only LSB at each channel
 600     *    contains meaning [sic] data, software should make sure all higher bits
 601     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 602     *
 603     * We select the representation of a true boolean uniform to be ~0, and fix
 604     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 605     */
 606    ctx->Const.UniformBooleanTrue = ~0;
 607
 608    /* From the gen4 PRM, volume 4 page 127:
 609     *
 610     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 611     *      the base address of the first element of the surface, computed in
 612     *      software by adding the surface base address to the byte offset of
 613     *      the element in the buffer."
 614     *
 615     * However, unaligned accesses are slower, so enforce buffer alignment.
 616     */
 617    ctx->Const.UniformBufferOffsetAlignment = 16;
 618
 619    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 620     * that we can safely have the CPU and GPU writing the same SSBO on
 621     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 622     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 623     * be updating disjoint regions of the buffer simultaneously and that will
 624     * break if the regions overlap the same cacheline.
 625     */
 626    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 627    ctx->Const.TextureBufferOffsetAlignment = 16;
 628    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 629
 630    if (brw->gen >= 6) {
 631       ctx->Const.MaxVarying = 32;
 632       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 633       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 634       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 635       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 636       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 637       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 638       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 639       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 640    }
 641
 642    /* We want the GLSL compiler to emit code that uses condition codes */
 643    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 644       ctx->Const.ShaderCompilerOptions[i] =
 645          brw->intelScreen->compiler->glsl_compiler_options[i];
 646    }
 647
 648    if (brw->gen >= 7) {
 649       ctx->Const.MaxViewportWidth = 32768;
 650       ctx->Const.MaxViewportHeight = 32768;
 651    }
 652
 653    /* ARB_viewport_array */
 654    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 655       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 656       ctx->Const.ViewportSubpixelBits = 0;
 657
 658       /* Cast to float before negating because MaxViewportWidth is unsigned.
 659        */
 660       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 661       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 662    }
 663
 664    /* ARB_gpu_shader5 */
 665    if (brw->gen >= 7)
 666       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 667
 668    /* ARB_framebuffer_no_attachments */
 669    ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
 670    ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
 671    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 672    ctx->Const.MaxFramebufferSamples = max_samples;
 673 }
 674
 675 static void
 676 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
 677 {
 678    struct gl_context *ctx = &brw->ctx;
 679
 680    /* For ES, we set these constants based on SIMD8.
 681     *
 682     * TODO: Once we can always generate SIMD16, we should update this.
 683     *
 684     * For GL, we assume we can generate a SIMD16 program, but this currently
 685     * is not always true. This allows us to run more test cases, and will be
 686     * required based on desktop GL compute shader requirements.
 687     */
 688    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 689
 690    const uint32_t max_invocations = simd_size * max_threads;
 691    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 692    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 693    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 694    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 695    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 696 }
 697
 698 /**
 699  * Process driconf (drirc) options, setting appropriate context flags.
 700  *
 701  * intelInitExtensions still pokes at optionCache directly, in order to
 702  * avoid advertising various extensions.  No flags are set, so it makes
 703  * sense to continue doing that there.
 704  */
 705 static void
 706 brw_process_driconf_options(struct brw_context *brw)
 707 {
 708    struct gl_context *ctx = &brw->ctx;
 709
 710    driOptionCache *options = &brw->optionCache;
 711    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 712                        brw->driContext->driScreenPriv->myNum, "i965");
 713
 714    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 715    switch (bo_reuse_mode) {
 716    case DRI_CONF_BO_REUSE_DISABLED:
 717       break;
 718    case DRI_CONF_BO_REUSE_ALL:
 719       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 720       break;
 721    }
 722
 723    if (!driQueryOptionb(options, "hiz")) {
 724        brw->has_hiz = false;
 725        /* On gen6, you can only do separate stencil with HIZ. */
 726        if (brw->gen == 6)
 727           brw->has_separate_stencil = false;
 728    }
 729
 730    if (driQueryOptionb(options, "always_flush_batch")) {
 731       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 732       brw->always_flush_batch = true;
 733    }
 734
 735    if (driQueryOptionb(options, "always_flush_cache")) {
 736       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 737       brw->always_flush_cache = true;
 738    }
 739
 740    if (driQueryOptionb(options, "disable_throttling")) {
 741       fprintf(stderr, "disabling flush throttling\n");
 742       brw->disable_throttling = true;
 743    }
 744
 745    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 746
 747    ctx->Const.ForceGLSLExtensionsWarn =
 748       driQueryOptionb(options, "force_glsl_extensions_warn");
 749
 750    ctx->Const.DisableGLSLLineContinuations =
 751       driQueryOptionb(options, "disable_glsl_line_continuations");
 752
 753    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 754       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 755
 756    brw->dual_color_blend_by_location =
 757       driQueryOptionb(options, "dual_color_blend_by_location");
 758 }
 759
 760 GLboolean
 761 brwCreateContext(gl_api api,
 762                  const struct gl_config *mesaVis,
 763                  __DRIcontext *driContextPriv,
 764                  unsigned major_version,
 765                  unsigned minor_version,
 766                  uint32_t flags,
 767                  bool notify_reset,
 768                  unsigned *dri_ctx_error,
 769                  void *sharedContextPrivate)
 770 {
 771    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 772    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 773    struct intel_screen *screen = sPriv->driverPrivate;
 774    const struct brw_device_info *devinfo = screen->devinfo;
 775    struct dd_function_table functions;
 776
 777    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 778     * provides us with context reset notifications.
 779     */
 780    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 781       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 782
 783    if (screen->has_context_reset_notification)
 784       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 785
 786    if (flags & ~allowed_flags) {
 787       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 788       return false;
 789    }
 790
 791    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 792    if (!brw) {
 793       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 794       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 795       return false;
 796    }
 797
 798    driContextPriv->driverPrivate = brw;
 799    brw->driContext = driContextPriv;
 800    brw->intelScreen = screen;
 801    brw->bufmgr = screen->bufmgr;
 802
 803    brw->gen = devinfo->gen;
 804    brw->gt = devinfo->gt;
 805    brw->is_g4x = devinfo->is_g4x;
 806    brw->is_baytrail = devinfo->is_baytrail;
 807    brw->is_haswell = devinfo->is_haswell;
 808    brw->is_cherryview = devinfo->is_cherryview;
 809    brw->is_broxton = devinfo->is_broxton;
 810    brw->has_llc = devinfo->has_llc;
 811    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 812    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 813    brw->has_pln = devinfo->has_pln;
 814    brw->has_compr4 = devinfo->has_compr4;
 815    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 816    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 817    brw->needs_unlit_centroid_workaround =
 818       devinfo->needs_unlit_centroid_workaround;
 819
 820    brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
 821    brw->has_swizzling = screen->hw_has_swizzling;
 822
 823    brw->vs.base.stage = MESA_SHADER_VERTEX;
 824    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 825    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 826    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 827    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 828    if (brw->gen >= 8) {
 829       gen8_init_vtable_surface_functions(brw);
 830       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 831    } else if (brw->gen >= 7) {
 832       gen7_init_vtable_surface_functions(brw);
 833       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 834    } else if (brw->gen >= 6) {
 835       gen6_init_vtable_surface_functions(brw);
 836       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 837    } else {
 838       gen4_init_vtable_surface_functions(brw);
 839       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 840    }
 841
 842    brw_init_driver_functions(brw, &functions);
 843
 844    if (notify_reset)
 845       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 846
 847    struct gl_context *ctx = &brw->ctx;
 848
 849    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 850       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 851       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 852       intelDestroyContext(driContextPriv);
 853       return false;
 854    }
 855
 856    driContextSetFlags(ctx, flags);
 857
 858    /* Initialize the software rasterizer and helper modules.
 859     *
 860     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 861     * software fallbacks (which we have to support on legacy GL to do weird
 862     * glDrawPixels(), glBitmap(), and other functions).
 863     */
 864    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 865       _swrast_CreateContext(ctx);
 866    }
 867
 868    _vbo_CreateContext(ctx);
 869    if (ctx->swrast_context) {
 870       _tnl_CreateContext(ctx);
 871       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 872       _swsetup_CreateContext(ctx);
 873
 874       /* Configure swrast to match hardware characteristics: */
 875       _swrast_allow_pixel_fog(ctx, false);
 876       _swrast_allow_vertex_fog(ctx, true);
 877    }
 878
 879    _mesa_meta_init(ctx);
 880
 881    brw_process_driconf_options(brw);
 882
 883    if (INTEL_DEBUG & DEBUG_PERF)
 884       brw->perf_debug = true;
 885
 886    brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
 887    brw_initialize_context_constants(brw);
 888
 889    ctx->Const.ResetStrategy = notify_reset
 890       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 891
 892    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 893    _mesa_init_point(ctx);
 894
 895    intel_fbo_init(brw);
 896
 897    intel_batchbuffer_init(brw);
 898
 899    if (brw->gen >= 6) {
 900       /* Create a new hardware context.  Using a hardware context means that
 901        * our GPU state will be saved/restored on context switch, allowing us
 902        * to assume that the GPU is in the same state we left it in.
 903        *
 904        * This is required for transform feedback buffer offsets, query objects,
 905        * and also allows us to reduce how much state we have to emit.
 906        */
 907       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 908
 909       if (!brw->hw_ctx) {
 910          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 911          intelDestroyContext(driContextPriv);
 912          return false;
 913       }
 914    }
 915
 916    if (brw_init_pipe_control(brw, devinfo)) {
 917       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 918       intelDestroyContext(driContextPriv);
 919       return false;
 920    }
 921
 922    brw_init_state(brw);
 923
 924    intelInitExtensions(ctx);
 925
 926    brw_init_surface_formats(brw);
 927
 928    brw->max_vs_threads = devinfo->max_vs_threads;
 929    brw->max_hs_threads = devinfo->max_hs_threads;
 930    brw->max_ds_threads = devinfo->max_ds_threads;
 931    brw->max_gs_threads = devinfo->max_gs_threads;
 932    brw->max_wm_threads = devinfo->max_wm_threads;
 933    brw->max_cs_threads = devinfo->max_cs_threads;
 934    brw->urb.size = devinfo->urb.size;
 935    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 936    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 937    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 938    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 939    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 940
 941    /* Estimate the size of the mappable aperture into the GTT.  There's an
 942     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 943     * It turns out it's basically always 256MB, though some ancient hardware
 944     * was smaller.
 945     */
 946    uint32_t gtt_size = 256 * 1024 * 1024;
 947
 948    /* We don't want to map two objects such that a memcpy between them would
 949     * just fault one mapping in and then the other over and over forever.  So
 950     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 951     * taken up by things like the framebuffer and the ringbuffer and such, so
 952     * be more conservative.
 953     */
 954    brw->max_gtt_map_object_size = gtt_size / 4;
 955
 956    if (brw->gen == 6)
 957       brw->urb.gs_present = false;
 958
 959    brw->prim_restart.in_progress = false;
 960    brw->prim_restart.enable_cut_index = false;
 961    brw->gs.enabled = false;
 962    brw->sf.viewport_transform_enable = true;
 963
 964    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 965
 966    brw->use_resource_streamer = screen->has_resource_streamer &&
 967       (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
 968        env_var_as_boolean("INTEL_USE_GATHER", false));
 969
 970    ctx->VertexProgram._MaintainTnlProgram = true;
 971    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 972
 973    brw_draw_init( brw );
 974
 975    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 976       /* Turn on some extra GL_ARB_debug_output generation. */
 977       brw->perf_debug = true;
 978    }
 979
 980    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 981       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 982
 983    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 984       brw_init_shader_time(brw);
 985
 986    _mesa_compute_version(ctx);
 987
 988    _mesa_initialize_dispatch_tables(ctx);
 989    _mesa_initialize_vbo_vtxfmt(ctx);
 990
 991    if (ctx->Extensions.AMD_performance_monitor) {
 992       brw_init_performance_monitors(brw);
 993    }
 994
 995    vbo_use_buffer_objects(ctx);
 996    vbo_always_unmap_buffers(ctx);
 997
 998    return true;
 999 }
1000
1001 void
1002 intelDestroyContext(__DRIcontext * driContextPriv)
1003 {
1004    struct brw_context *brw =
1005       (struct brw_context *) driContextPriv->driverPrivate;
1006    struct gl_context *ctx = &brw->ctx;
1007
1008    /* Dump a final BMP in case the application doesn't call SwapBuffers */
1009    if (INTEL_DEBUG & DEBUG_AUB) {
1010       intel_batchbuffer_flush(brw);
1011       aub_dump_bmp(&brw->ctx);
1012    }
1013
1014    _mesa_meta_free(&brw->ctx);
1015    brw_meta_fast_clear_free(brw);
1016
1017    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1018       /* Force a report. */
1019       brw->shader_time.report_time = 0;
1020
1021       brw_collect_and_report_shader_time(brw);
1022       brw_destroy_shader_time(brw);
1023    }
1024
1025    brw_destroy_state(brw);
1026    brw_draw_destroy(brw);
1027
1028    drm_intel_bo_unreference(brw->curbe.curbe_bo);
1029    if (brw->vs.base.scratch_bo)
1030       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1031    if (brw->gs.base.scratch_bo)
1032       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1033    if (brw->wm.base.scratch_bo)
1034       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1035
1036    gen7_reset_hw_bt_pool_offsets(brw);
1037    drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1038    brw->hw_bt_pool.bo = NULL;
1039
1040    drm_intel_gem_context_destroy(brw->hw_ctx);
1041
1042    if (ctx->swrast_context) {
1043       _swsetup_DestroyContext(&brw->ctx);
1044       _tnl_DestroyContext(&brw->ctx);
1045    }
1046    _vbo_DestroyContext(&brw->ctx);
1047
1048    if (ctx->swrast_context)
1049       _swrast_DestroyContext(&brw->ctx);
1050
1051    brw_fini_pipe_control(brw);
1052    intel_batchbuffer_free(brw);
1053
1054    drm_intel_bo_unreference(brw->throttle_batch[1]);
1055    drm_intel_bo_unreference(brw->throttle_batch[0]);
1056    brw->throttle_batch[1] = NULL;
1057    brw->throttle_batch[0] = NULL;
1058
1059    driDestroyOptionCache(&brw->optionCache);
1060
1061    /* free the Mesa context */
1062    _mesa_free_context_data(&brw->ctx);
1063
1064    ralloc_free(brw);
1065    driContextPriv->driverPrivate = NULL;
1066 }
1067
1068 GLboolean
1069 intelUnbindContext(__DRIcontext * driContextPriv)
1070 {
1071    /* Unset current context and dispath table */
1072    _mesa_make_current(NULL, NULL, NULL);
1073
1074    return true;
1075 }
1076
1077 /**
1078  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1079  * on window system framebuffers.
1080  *
1081  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1082  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1083  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1084  * for a visual where you're guaranteed to be capable, but it turns out that
1085  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1086  * incapable ones, because there's no difference between the two in resources
1087  * used.  Applications thus get built that accidentally rely on the default
1088  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1089  * great...
1090  *
1091  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1092  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1093  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1094  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1095  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1096  * and get no sRGB encode (assuming that both kinds of visual are available).
1097  * Thus our choice to support sRGB by default on our visuals for desktop would
1098  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1099  *
1100  * Unfortunately, renderbuffer setup happens before a context is created.  So
1101  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1102  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1103  * yet), we go turn that back off before anyone finds out.
1104  */
1105 static void
1106 intel_gles3_srgb_workaround(struct brw_context *brw,
1107                             struct gl_framebuffer *fb)
1108 {
1109    struct gl_context *ctx = &brw->ctx;
1110
1111    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1112       return;
1113
1114    /* Some day when we support the sRGB capable bit on visuals available for
1115     * GLES, we'll need to respect that and not disable things here.
1116     */
1117    fb->Visual.sRGBCapable = false;
1118    for (int i = 0; i < BUFFER_COUNT; i++) {
1119       if (fb->Attachment[i].Renderbuffer &&
1120           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1121          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1122       }
1123    }
1124 }
1125
1126 GLboolean
1127 intelMakeCurrent(__DRIcontext * driContextPriv,
1128                  __DRIdrawable * driDrawPriv,
1129                  __DRIdrawable * driReadPriv)
1130 {
1131    struct brw_context *brw;
1132    GET_CURRENT_CONTEXT(curCtx);
1133
1134    if (driContextPriv)
1135       brw = (struct brw_context *) driContextPriv->driverPrivate;
1136    else
1137       brw = NULL;
1138
1139    /* According to the glXMakeCurrent() man page: "Pending commands to
1140     * the previous context, if any, are flushed before it is released."
1141     * But only flush if we're actually changing contexts.
1142     */
1143    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1144       _mesa_flush(curCtx);
1145    }
1146
1147    if (driContextPriv) {
1148       struct gl_context *ctx = &brw->ctx;
1149       struct gl_framebuffer *fb, *readFb;
1150
1151       if (driDrawPriv == NULL) {
1152          fb = _mesa_get_incomplete_framebuffer();
1153       } else {
1154          fb = driDrawPriv->driverPrivate;
1155          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1156       }
1157
1158       if (driReadPriv == NULL) {
1159          readFb = _mesa_get_incomplete_framebuffer();
1160       } else {
1161          readFb = driReadPriv->driverPrivate;
1162          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1163       }
1164
1165       /* The sRGB workaround changes the renderbuffer's format. We must change
1166        * the format before the renderbuffer's miptree get's allocated, otherwise
1167        * the formats of the renderbuffer and its miptree will differ.
1168        */
1169       intel_gles3_srgb_workaround(brw, fb);
1170       intel_gles3_srgb_workaround(brw, readFb);
1171
1172       /* If the context viewport hasn't been initialized, force a call out to
1173        * the loader to get buffers so we have a drawable size for the initial
1174        * viewport. */
1175       if (!brw->ctx.ViewportInitialized)
1176          intel_prepare_render(brw);
1177
1178       _mesa_make_current(ctx, fb, readFb);
1179    } else {
1180       _mesa_make_current(NULL, NULL, NULL);
1181    }
1182
1183    return true;
1184 }
1185
1186 void
1187 intel_resolve_for_dri2_flush(struct brw_context *brw,
1188                              __DRIdrawable *drawable)
1189 {
1190    if (brw->gen < 6) {
1191       /* MSAA and fast color clear are not supported, so don't waste time
1192        * checking whether a resolve is needed.
1193        */
1194       return;
1195    }
1196
1197    struct gl_framebuffer *fb = drawable->driverPrivate;
1198    struct intel_renderbuffer *rb;
1199
1200    /* Usually, only the back buffer will need to be downsampled. However,
1201     * the front buffer will also need it if the user has rendered into it.
1202     */
1203    static const gl_buffer_index buffers[2] = {
1204          BUFFER_BACK_LEFT,
1205          BUFFER_FRONT_LEFT,
1206    };
1207
1208    for (int i = 0; i < 2; ++i) {
1209       rb = intel_get_renderbuffer(fb, buffers[i]);
1210       if (rb == NULL || rb->mt == NULL)
1211          continue;
1212       if (rb->mt->num_samples <= 1)
1213          intel_miptree_resolve_color(brw, rb->mt, 0);
1214       else
1215          intel_renderbuffer_downsample(brw, rb);
1216    }
1217 }
1218
1219 static unsigned
1220 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1221 {
1222    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1223 }
1224
1225 static void
1226 intel_query_dri2_buffers(struct brw_context *brw,
1227                          __DRIdrawable *drawable,
1228                          __DRIbuffer **buffers,
1229                          int *count);
1230
1231 static void
1232 intel_process_dri2_buffer(struct brw_context *brw,
1233                           __DRIdrawable *drawable,
1234                           __DRIbuffer *buffer,
1235                           struct intel_renderbuffer *rb,
1236                           const char *buffer_name);
1237
1238 static void
1239 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1240
1241 static void
1242 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1243 {
1244    struct gl_framebuffer *fb = drawable->driverPrivate;
1245    struct intel_renderbuffer *rb;
1246    __DRIbuffer *buffers = NULL;
1247    int i, count;
1248    const char *region_name;
1249
1250    /* Set this up front, so that in case our buffers get invalidated
1251     * while we're getting new buffers, we don't clobber the stamp and
1252     * thus ignore the invalidate. */
1253    drawable->lastStamp = drawable->dri2.stamp;
1254
1255    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1256       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1257
1258    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1259
1260    if (buffers == NULL)
1261       return;
1262
1263    for (i = 0; i < count; i++) {
1264        switch (buffers[i].attachment) {
1265        case __DRI_BUFFER_FRONT_LEFT:
1266            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1267            region_name = "dri2 front buffer";
1268            break;
1269
1270        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1271            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1272            region_name = "dri2 fake front buffer";
1273            break;
1274
1275        case __DRI_BUFFER_BACK_LEFT:
1276            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1277            region_name = "dri2 back buffer";
1278            break;
1279
1280        case __DRI_BUFFER_DEPTH:
1281        case __DRI_BUFFER_HIZ:
1282        case __DRI_BUFFER_DEPTH_STENCIL:
1283        case __DRI_BUFFER_STENCIL:
1284        case __DRI_BUFFER_ACCUM:
1285        default:
1286            fprintf(stderr,
1287                    "unhandled buffer attach event, attachment type %d\n",
1288                    buffers[i].attachment);
1289            return;
1290        }
1291
1292        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1293    }
1294
1295 }
1296
1297 void
1298 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1299 {
1300    struct brw_context *brw = context->driverPrivate;
1301    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1302
1303    /* Set this up front, so that in case our buffers get invalidated
1304     * while we're getting new buffers, we don't clobber the stamp and
1305     * thus ignore the invalidate. */
1306    drawable->lastStamp = drawable->dri2.stamp;
1307
1308    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1309       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1310
1311    if (screen->image.loader)
1312       intel_update_image_buffers(brw, drawable);
1313    else
1314       intel_update_dri2_buffers(brw, drawable);
1315
1316    driUpdateFramebufferSize(&brw->ctx, drawable);
1317 }
1318
1319 /**
1320  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1321  * state is required.
1322  */
1323 void
1324 intel_prepare_render(struct brw_context *brw)
1325 {
1326    struct gl_context *ctx = &brw->ctx;
1327    __DRIcontext *driContext = brw->driContext;
1328    __DRIdrawable *drawable;
1329
1330    drawable = driContext->driDrawablePriv;
1331    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1332       if (drawable->lastStamp != drawable->dri2.stamp)
1333          intel_update_renderbuffers(driContext, drawable);
1334       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1335    }
1336
1337    drawable = driContext->driReadablePriv;
1338    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1339       if (drawable->lastStamp != drawable->dri2.stamp)
1340          intel_update_renderbuffers(driContext, drawable);
1341       driContext->dri2.read_stamp = drawable->dri2.stamp;
1342    }
1343
1344    /* If we're currently rendering to the front buffer, the rendering
1345     * that will happen next will probably dirty the front buffer.  So
1346     * mark it as dirty here.
1347     */
1348    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1349       brw->front_buffer_dirty = true;
1350 }
1351
1352 /**
1353  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1354  *
1355  * To determine which DRI buffers to request, examine the renderbuffers
1356  * attached to the drawable's framebuffer. Then request the buffers with
1357  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1358  *
1359  * This is called from intel_update_renderbuffers().
1360  *
1361  * \param drawable      Drawable whose buffers are queried.
1362  * \param buffers       [out] List of buffers returned by DRI2 query.
1363  * \param buffer_count  [out] Number of buffers returned.
1364  *
1365  * \see intel_update_renderbuffers()
1366  * \see DRI2GetBuffers()
1367  * \see DRI2GetBuffersWithFormat()
1368  */
1369 static void
1370 intel_query_dri2_buffers(struct brw_context *brw,
1371                          __DRIdrawable *drawable,
1372                          __DRIbuffer **buffers,
1373                          int *buffer_count)
1374 {
1375    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1376    struct gl_framebuffer *fb = drawable->driverPrivate;
1377    int i = 0;
1378    unsigned attachments[8];
1379
1380    struct intel_renderbuffer *front_rb;
1381    struct intel_renderbuffer *back_rb;
1382
1383    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1384    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1385
1386    memset(attachments, 0, sizeof(attachments));
1387    if ((_mesa_is_front_buffer_drawing(fb) ||
1388         _mesa_is_front_buffer_reading(fb) ||
1389         !back_rb) && front_rb) {
1390       /* If a fake front buffer is in use, then querying for
1391        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1392        * the real front buffer to the fake front buffer.  So before doing the
1393        * query, we need to make sure all the pending drawing has landed in the
1394        * real front buffer.
1395        */
1396       intel_batchbuffer_flush(brw);
1397       intel_flush_front(&brw->ctx);
1398
1399       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1400       attachments[i++] = intel_bits_per_pixel(front_rb);
1401    } else if (front_rb && brw->front_buffer_dirty) {
1402       /* We have pending front buffer rendering, but we aren't querying for a
1403        * front buffer.  If the front buffer we have is a fake front buffer,
1404        * the X server is going to throw it away when it processes the query.
1405        * So before doing the query, make sure all the pending drawing has
1406        * landed in the real front buffer.
1407        */
1408       intel_batchbuffer_flush(brw);
1409       intel_flush_front(&brw->ctx);
1410    }
1411
1412    if (back_rb) {
1413       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1414       attachments[i++] = intel_bits_per_pixel(back_rb);
1415    }
1416
1417    assert(i <= ARRAY_SIZE(attachments));
1418
1419    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1420                                                         &drawable->w,
1421                                                         &drawable->h,
1422                                                         attachments, i / 2,
1423                                                         buffer_count,
1424                                                         drawable->loaderPrivate);
1425 }
1426
1427 /**
1428  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1429  *
1430  * This is called from intel_update_renderbuffers().
1431  *
1432  * \par Note:
1433  *    DRI buffers whose attachment point is DRI2BufferStencil or
1434  *    DRI2BufferDepthStencil are handled as special cases.
1435  *
1436  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1437  *        that is passed to drm_intel_bo_gem_create_from_name().
1438  *
1439  * \see intel_update_renderbuffers()
1440  */
1441 static void
1442 intel_process_dri2_buffer(struct brw_context *brw,
1443                           __DRIdrawable *drawable,
1444                           __DRIbuffer *buffer,
1445                           struct intel_renderbuffer *rb,
1446                           const char *buffer_name)
1447 {
1448    struct gl_framebuffer *fb = drawable->driverPrivate;
1449    drm_intel_bo *bo;
1450
1451    if (!rb)
1452       return;
1453
1454    unsigned num_samples = rb->Base.Base.NumSamples;
1455
1456    /* We try to avoid closing and reopening the same BO name, because the first
1457     * use of a mapping of the buffer involves a bunch of page faulting which is
1458     * moderately expensive.
1459     */
1460    struct intel_mipmap_tree *last_mt;
1461    if (num_samples == 0)
1462       last_mt = rb->mt;
1463    else
1464       last_mt = rb->singlesample_mt;
1465
1466    uint32_t old_name = 0;
1467    if (last_mt) {
1468        /* The bo already has a name because the miptree was created by a
1469         * previous call to intel_process_dri2_buffer(). If a bo already has a
1470         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1471         * create a new name.
1472         */
1473       drm_intel_bo_flink(last_mt->bo, &old_name);
1474    }
1475
1476    if (old_name == buffer->name)
1477       return;
1478
1479    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1480       fprintf(stderr,
1481               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1482               buffer->name, buffer->attachment,
1483               buffer->cpp, buffer->pitch);
1484    }
1485
1486    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1487                                           buffer->name);
1488    if (!bo) {
1489       fprintf(stderr,
1490               "Failed to open BO for returned DRI2 buffer "
1491               "(%dx%d, %s, named %d).\n"
1492               "This is likely a bug in the X Server that will lead to a "
1493               "crash soon.\n",
1494               drawable->w, drawable->h, buffer_name, buffer->name);
1495       return;
1496    }
1497
1498    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1499                                             drawable->w, drawable->h,
1500                                             buffer->pitch);
1501
1502    if (_mesa_is_front_buffer_drawing(fb) &&
1503        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1504         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1505        rb->Base.Base.NumSamples > 1) {
1506       intel_renderbuffer_upsample(brw, rb);
1507    }
1508
1509    assert(rb->mt);
1510
1511    drm_intel_bo_unreference(bo);
1512 }
1513
1514 /**
1515  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1516  *
1517  * To determine which DRI buffers to request, examine the renderbuffers
1518  * attached to the drawable's framebuffer. Then request the buffers from
1519  * the image loader
1520  *
1521  * This is called from intel_update_renderbuffers().
1522  *
1523  * \param drawable      Drawable whose buffers are queried.
1524  * \param buffers       [out] List of buffers returned by DRI2 query.
1525  * \param buffer_count  [out] Number of buffers returned.
1526  *
1527  * \see intel_update_renderbuffers()
1528  */
1529
1530 static void
1531 intel_update_image_buffer(struct brw_context *intel,
1532                           __DRIdrawable *drawable,
1533                           struct intel_renderbuffer *rb,
1534                           __DRIimage *buffer,
1535                           enum __DRIimageBufferMask buffer_type)
1536 {
1537    struct gl_framebuffer *fb = drawable->driverPrivate;
1538
1539    if (!rb || !buffer->bo)
1540       return;
1541
1542    unsigned num_samples = rb->Base.Base.NumSamples;
1543
1544    /* Check and see if we're already bound to the right
1545     * buffer object
1546     */
1547    struct intel_mipmap_tree *last_mt;
1548    if (num_samples == 0)
1549       last_mt = rb->mt;
1550    else
1551       last_mt = rb->singlesample_mt;
1552
1553    if (last_mt && last_mt->bo == buffer->bo)
1554       return;
1555
1556    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1557                                             buffer->width, buffer->height,
1558                                             buffer->pitch);
1559
1560    if (_mesa_is_front_buffer_drawing(fb) &&
1561        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1562        rb->Base.Base.NumSamples > 1) {
1563       intel_renderbuffer_upsample(intel, rb);
1564    }
1565 }
1566
1567 static void
1568 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1569 {
1570    struct gl_framebuffer *fb = drawable->driverPrivate;
1571    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1572    struct intel_renderbuffer *front_rb;
1573    struct intel_renderbuffer *back_rb;
1574    struct __DRIimageList images;
1575    unsigned int format;
1576    uint32_t buffer_mask = 0;
1577
1578    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1579    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1580
1581    if (back_rb)
1582       format = intel_rb_format(back_rb);
1583    else if (front_rb)
1584       format = intel_rb_format(front_rb);
1585    else
1586       return;
1587
1588    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1589                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1590       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1591    }
1592
1593    if (back_rb)
1594       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1595
1596    (*screen->image.loader->getBuffers) (drawable,
1597                                         driGLFormatToImageFormat(format),
1598                                         &drawable->dri2.stamp,
1599                                         drawable->loaderPrivate,
1600                                         buffer_mask,
1601                                         &images);
1602
1603    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1604       drawable->w = images.front->width;
1605       drawable->h = images.front->height;
1606       intel_update_image_buffer(brw,
1607                                 drawable,
1608                                 front_rb,
1609                                 images.front,
1610                                 __DRI_IMAGE_BUFFER_FRONT);
1611    }
1612    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1613       drawable->w = images.back->width;
1614       drawable->h = images.back->height;
1615       intel_update_image_buffer(brw,
1616                                 drawable,
1617                                 back_rb,
1618                                 images.back,
1619                                 __DRI_IMAGE_BUFFER_BACK);
1620    }
1621 }