src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44 #include "main/framebuffer.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/driverfuncs.h"
  49 #include "drivers/common/meta.h"
  50 #include "utils.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_defines.h"
  54 #include "brw_compiler.h"
  55 #include "brw_draw.h"
  56 #include "brw_state.h"
  57
  58 #include "intel_batchbuffer.h"
  59 #include "intel_buffer_objects.h"
  60 #include "intel_buffers.h"
  61 #include "intel_fbo.h"
  62 #include "intel_mipmap_tree.h"
  63 #include "intel_pixel.h"
  64 #include "intel_image.h"
  65 #include "intel_tex.h"
  66 #include "intel_tex_obj.h"
  67
  68 #include "swrast_setup/swrast_setup.h"
  69 #include "tnl/tnl.h"
  70 #include "tnl/t_pipeline.h"
  71 #include "util/ralloc.h"
  72 #include "util/debug.h"
  73
  74 /***************************************
  75  * Mesa's Driver Functions
  76  ***************************************/
  77
  78 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  79
  80 const char *
  81 brw_get_renderer_string(unsigned deviceID)
  82 {
  83    const char *chipset;
  84    static char buffer[128];
  85
  86    switch (deviceID) {
  87 #undef CHIPSET
  88 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
  89 #include "pci_ids/i965_pci_ids.h"
  90    default:
  91       chipset = "Unknown Intel Chipset";
  92       break;
  93    }
  94
  95    (void) driGetRendererString(buffer, chipset, 0);
  96    return buffer;
  97 }
  98
  99 static const GLubyte *
 100 intel_get_string(struct gl_context * ctx, GLenum name)
 101 {
 102    const struct brw_context *const brw = brw_context(ctx);
 103
 104    switch (name) {
 105    case GL_VENDOR:
 106       return (GLubyte *) brw_vendor_string;
 107
 108    case GL_RENDERER:
 109       return
 110          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 111
 112    default:
 113       return NULL;
 114    }
 115 }
 116
 117 static void
 118 intel_viewport(struct gl_context *ctx)
 119 {
 120    struct brw_context *brw = brw_context(ctx);
 121    __DRIcontext *driContext = brw->driContext;
 122
 123    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 124       if (driContext->driDrawablePriv)
 125          dri2InvalidateDrawable(driContext->driDrawablePriv);
 126       if (driContext->driReadablePriv)
 127          dri2InvalidateDrawable(driContext->driReadablePriv);
 128    }
 129 }
 130
 131 static void
 132 intel_update_framebuffer(struct gl_context *ctx,
 133                          struct gl_framebuffer *fb)
 134 {
 135    struct brw_context *brw = brw_context(ctx);
 136
 137    /* Quantize the derived default number of samples
 138     */
 139    fb->DefaultGeometry._NumSamples =
 140       intel_quantize_num_samples(brw->intelScreen,
 141                                  fb->DefaultGeometry.NumSamples);
 142 }
 143
 144 static void
 145 intel_update_state(struct gl_context * ctx, GLuint new_state)
 146 {
 147    struct brw_context *brw = brw_context(ctx);
 148    struct intel_texture_object *tex_obj;
 149    struct intel_renderbuffer *depth_irb;
 150
 151    if (ctx->swrast_context)
 152       _swrast_InvalidateState(ctx, new_state);
 153    _vbo_InvalidateState(ctx, new_state);
 154
 155    brw->NewGLState |= new_state;
 156
 157    _mesa_unlock_context_textures(ctx);
 158
 159    /* Resolve the depth buffer's HiZ buffer. */
 160    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 161    if (depth_irb)
 162       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 163
 164    /* Resolve depth buffer and render cache of each enabled texture. */
 165    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 166    for (int i = 0; i <= maxEnabledUnit; i++) {
 167       if (!ctx->Texture.Unit[i]._Current)
 168          continue;
 169       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 170       if (!tex_obj || !tex_obj->mt)
 171          continue;
 172       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 173       /* Sampling engine understands lossless compression and resolving
 174        * those surfaces should be skipped for performance reasons.
 175        */
 176       intel_miptree_resolve_color(brw, tex_obj->mt,
 177                                   INTEL_MIPTREE_IGNORE_CCS_E);
 178       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 179    }
 180
 181    /* Resolve color for each active shader image. */
 182    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 183       const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
 184          ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
 185
 186       if (unlikely(shader && shader->NumImages)) {
 187          for (unsigned j = 0; j < shader->NumImages; j++) {
 188             struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
 189             tex_obj = intel_texture_object(u->TexObj);
 190
 191             if (tex_obj && tex_obj->mt) {
 192                /* Access to images is implemented using indirect messages
 193                 * against data port. Normal render target write understands
 194                 * lossless compression but unfortunately the typed/untyped
 195                 * read/write interface doesn't. Therefore the compressed
 196                 * surfaces need to be resolved prior to accessing them.
 197                 */
 198                intel_miptree_resolve_color(brw, tex_obj->mt, 0);
 199                brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 200             }
 201          }
 202       }
 203    }
 204
 205    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
 206     * single-sampled color renderbuffers because the CCS buffer isn't
 207     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
 208     * enabled because otherwise the surface state will be programmed with the
 209     * linear equivalent format anyway.
 210     */
 211    if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
 212       struct gl_framebuffer *fb = ctx->DrawBuffer;
 213       for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
 214          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
 215
 216          if (rb == NULL)
 217             continue;
 218
 219          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 220          struct intel_mipmap_tree *mt = irb->mt;
 221
 222          if (mt == NULL ||
 223              mt->num_samples > 1 ||
 224              _mesa_get_srgb_format_linear(mt->format) == mt->format)
 225                continue;
 226
 227          /* Lossless compression is not supported for SRGB formats, it
 228           * should be impossible to get here with such surfaces.
 229           */
 230          assert(!intel_miptree_is_lossless_compressed(brw, mt));
 231          intel_miptree_resolve_color(brw, mt, 0);
 232          brw_render_cache_set_check_flush(brw, mt->bo);
 233       }
 234    }
 235
 236    _mesa_lock_context_textures(ctx);
 237
 238    if (new_state & _NEW_BUFFERS) {
 239       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 240       if (ctx->DrawBuffer != ctx->ReadBuffer)
 241          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 242    }
 243 }
 244
 245 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 246
 247 static void
 248 intel_flush_front(struct gl_context *ctx)
 249 {
 250    struct brw_context *brw = brw_context(ctx);
 251    __DRIcontext *driContext = brw->driContext;
 252    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 253    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 254
 255    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 256       if (flushFront(screen) && driDrawable &&
 257           driDrawable->loaderPrivate) {
 258
 259          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 260           *
 261           * This potentially resolves both front and back buffer. It
 262           * is unnecessary to resolve the back, but harms nothing except
 263           * performance. And no one cares about front-buffer render
 264           * performance.
 265           */
 266          intel_resolve_for_dri2_flush(brw, driDrawable);
 267          intel_batchbuffer_flush(brw);
 268
 269          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 270
 271          /* We set the dirty bit in intel_prepare_render() if we're
 272           * front buffer rendering once we get there.
 273           */
 274          brw->front_buffer_dirty = false;
 275       }
 276    }
 277 }
 278
 279 static void
 280 intel_glFlush(struct gl_context *ctx)
 281 {
 282    struct brw_context *brw = brw_context(ctx);
 283
 284    intel_batchbuffer_flush(brw);
 285    intel_flush_front(ctx);
 286
 287    brw->need_flush_throttle = true;
 288 }
 289
 290 static void
 291 intel_finish(struct gl_context * ctx)
 292 {
 293    struct brw_context *brw = brw_context(ctx);
 294
 295    intel_glFlush(ctx);
 296
 297    if (brw->batch.last_bo)
 298       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 299 }
 300
 301 static void
 302 brw_init_driver_functions(struct brw_context *brw,
 303                           struct dd_function_table *functions)
 304 {
 305    _mesa_init_driver_functions(functions);
 306
 307    /* GLX uses DRI2 invalidate events to handle window resizing.
 308     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 309     * which doesn't provide a mechanism for snooping the event queues.
 310     *
 311     * So EGL still relies on viewport hacks to handle window resizing.
 312     * This should go away with DRI3000.
 313     */
 314    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 315       functions->Viewport = intel_viewport;
 316
 317    functions->Flush = intel_glFlush;
 318    functions->Finish = intel_finish;
 319    functions->GetString = intel_get_string;
 320    functions->UpdateState = intel_update_state;
 321
 322    intelInitTextureFuncs(functions);
 323    intelInitTextureImageFuncs(functions);
 324    intelInitTextureSubImageFuncs(functions);
 325    intelInitTextureCopyImageFuncs(functions);
 326    intelInitCopyImageFuncs(functions);
 327    intelInitClearFuncs(functions);
 328    intelInitBufferFuncs(functions);
 329    intelInitPixelFuncs(functions);
 330    intelInitBufferObjectFuncs(functions);
 331    intel_init_syncobj_functions(functions);
 332    brw_init_object_purgeable_functions(functions);
 333
 334    brwInitFragProgFuncs( functions );
 335    brw_init_common_queryobj_functions(functions);
 336    if (brw->gen >= 6)
 337       gen6_init_queryobj_functions(functions);
 338    else
 339       gen4_init_queryobj_functions(functions);
 340    brw_init_compute_functions(functions);
 341    if (brw->gen >= 7)
 342       brw_init_conditional_render_functions(functions);
 343
 344    functions->QueryInternalFormat = brw_query_internal_format;
 345
 346    functions->NewTransformFeedback = brw_new_transform_feedback;
 347    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 348    functions->GetTransformFeedbackVertexCount =
 349       brw_get_transform_feedback_vertex_count;
 350    if (brw->gen >= 7) {
 351       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 352       functions->EndTransformFeedback = gen7_end_transform_feedback;
 353       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 354       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 355    } else {
 356       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 357       functions->EndTransformFeedback = brw_end_transform_feedback;
 358    }
 359
 360    if (brw->gen >= 6)
 361       functions->GetSamplePosition = gen6_get_sample_position;
 362 }
 363
 364 static void
 365 brw_initialize_context_constants(struct brw_context *brw)
 366 {
 367    struct gl_context *ctx = &brw->ctx;
 368    const struct brw_compiler *compiler = brw->intelScreen->compiler;
 369
 370    const bool stage_exists[MESA_SHADER_STAGES] = {
 371       [MESA_SHADER_VERTEX] = true,
 372       [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
 373       [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
 374       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
 375       [MESA_SHADER_FRAGMENT] = true,
 376       [MESA_SHADER_COMPUTE] =
 377          (ctx->API == API_OPENGL_CORE &&
 378           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 379          (ctx->API == API_OPENGLES2 &&
 380           ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
 381          _mesa_extension_override_enables.ARB_compute_shader,
 382    };
 383
 384    unsigned num_stages = 0;
 385    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 386       if (stage_exists[i])
 387          num_stages++;
 388    }
 389
 390    unsigned max_samplers =
 391       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 392
 393    ctx->Const.MaxDualSourceDrawBuffers = 1;
 394    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 395    ctx->Const.MaxCombinedShaderOutputResources =
 396       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 397
 398    ctx->Const.QueryCounterBits.Timestamp = 36;
 399
 400    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 401    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 402    ctx->Const.MaxRenderbufferSize = 8192;
 403    ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
 404    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 405    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 406    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
 407    ctx->Const.MaxTextureMbytes = 1536;
 408    ctx->Const.MaxTextureRectSize = 1 << 12;
 409    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 410    ctx->Const.StripTextureBorder = true;
 411    if (brw->gen >= 7)
 412       ctx->Const.MaxProgramTextureGatherComponents = 4;
 413    else if (brw->gen == 6)
 414       ctx->Const.MaxProgramTextureGatherComponents = 1;
 415
 416    ctx->Const.MaxUniformBlockSize = 65536;
 417
 418    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 419       struct gl_program_constants *prog = &ctx->Const.Program[i];
 420
 421       if (!stage_exists[i])
 422          continue;
 423
 424       prog->MaxTextureImageUnits = max_samplers;
 425
 426       prog->MaxUniformBlocks = BRW_MAX_UBO;
 427       prog->MaxCombinedUniformComponents =
 428          prog->MaxUniformComponents +
 429          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 430
 431       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 432       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 433       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 434       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 435    }
 436
 437    ctx->Const.MaxTextureUnits =
 438       MIN2(ctx->Const.MaxTextureCoordUnits,
 439            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 440
 441    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 442    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 443    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 444    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 445    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 446    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 447    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 448
 449
 450    /* Hardware only supports a limited number of transform feedback buffers.
 451     * So we need to override the Mesa default (which is based only on software
 452     * limits).
 453     */
 454    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 455
 456    /* On Gen6, in the worst case, we use up one binding table entry per
 457     * transform feedback component (see comments above the definition of
 458     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 459     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 460     * BRW_MAX_SOL_BINDINGS.
 461     *
 462     * In "separate components" mode, we need to divide this value by
 463     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 464     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 465     */
 466    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 467    ctx->Const.MaxTransformFeedbackSeparateComponents =
 468       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 469
 470    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 471
 472    int max_samples;
 473    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 474    const int clamp_max_samples =
 475       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 476
 477    if (clamp_max_samples < 0) {
 478       max_samples = msaa_modes[0];
 479    } else {
 480       /* Select the largest supported MSAA mode that does not exceed
 481        * clamp_max_samples.
 482        */
 483       max_samples = 0;
 484       for (int i = 0; msaa_modes[i] != 0; ++i) {
 485          if (msaa_modes[i] <= clamp_max_samples) {
 486             max_samples = msaa_modes[i];
 487             break;
 488          }
 489       }
 490    }
 491
 492    ctx->Const.MaxSamples = max_samples;
 493    ctx->Const.MaxColorTextureSamples = max_samples;
 494    ctx->Const.MaxDepthTextureSamples = max_samples;
 495    ctx->Const.MaxIntegerSamples = max_samples;
 496    ctx->Const.MaxImageSamples = 0;
 497
 498    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 499     * to map indices of rectangular grid to sample numbers within a pixel.
 500     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 501     * extension implementation. For more details see the comment above
 502     * gen6_set_sample_maps() definition.
 503     */
 504    gen6_set_sample_maps(ctx);
 505
 506    ctx->Const.MinLineWidth = 1.0;
 507    ctx->Const.MinLineWidthAA = 1.0;
 508    if (brw->gen >= 6) {
 509       ctx->Const.MaxLineWidth = 7.375;
 510       ctx->Const.MaxLineWidthAA = 7.375;
 511       ctx->Const.LineWidthGranularity = 0.125;
 512    } else {
 513       ctx->Const.MaxLineWidth = 7.0;
 514       ctx->Const.MaxLineWidthAA = 7.0;
 515       ctx->Const.LineWidthGranularity = 0.5;
 516    }
 517
 518    /* For non-antialiased lines, we have to round the line width to the
 519     * nearest whole number. Make sure that we don't advertise a line
 520     * width that, when rounded, will be beyond the actual hardware
 521     * maximum.
 522     */
 523    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 524
 525    ctx->Const.MinPointSize = 1.0;
 526    ctx->Const.MinPointSizeAA = 1.0;
 527    ctx->Const.MaxPointSize = 255.0;
 528    ctx->Const.MaxPointSizeAA = 255.0;
 529    ctx->Const.PointSizeGranularity = 1.0;
 530
 531    if (brw->gen >= 5 || brw->is_g4x)
 532       ctx->Const.MaxClipPlanes = 8;
 533
 534    ctx->Const.LowerTessLevel = true;
 535
 536    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 537    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 538    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 539    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 540    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 541    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 542    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 543    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 544    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 545    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 546    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 547    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 548       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 549            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 550
 551    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 552    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 553    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 554    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 555    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 556    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 557    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 558    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 559    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 560       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 561            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 562
 563    /* Fragment shaders use real, 32-bit twos-complement integers for all
 564     * integer types.
 565     */
 566    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 567    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 568    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 569    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 570    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 571
 572    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 573    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 574    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 575    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 576    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 577
 578    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 579     * but we're not sure how it's actually done for vertex order,
 580     * that affect provoking vertex decision. Always use last vertex
 581     * convention for quad primitive which works as expected for now.
 582     */
 583    if (brw->gen >= 6)
 584       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 585
 586    ctx->Const.NativeIntegers = true;
 587    ctx->Const.VertexID_is_zero_based = true;
 588
 589    /* Regarding the CMP instruction, the Ivybridge PRM says:
 590     *
 591     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 592     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 593     *    0xFFFFFFFF) is assigned to dst."
 594     *
 595     * but PRMs for earlier generations say
 596     *
 597     *   "In dword format, one GRF may store up to 8 results. When the register
 598     *    is used later as a vector of Booleans, as only LSB at each channel
 599     *    contains meaning [sic] data, software should make sure all higher bits
 600     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 601     *
 602     * We select the representation of a true boolean uniform to be ~0, and fix
 603     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 604     */
 605    ctx->Const.UniformBooleanTrue = ~0;
 606
 607    /* From the gen4 PRM, volume 4 page 127:
 608     *
 609     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 610     *      the base address of the first element of the surface, computed in
 611     *      software by adding the surface base address to the byte offset of
 612     *      the element in the buffer."
 613     *
 614     * However, unaligned accesses are slower, so enforce buffer alignment.
 615     */
 616    ctx->Const.UniformBufferOffsetAlignment = 16;
 617
 618    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 619     * that we can safely have the CPU and GPU writing the same SSBO on
 620     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 621     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 622     * be updating disjoint regions of the buffer simultaneously and that will
 623     * break if the regions overlap the same cacheline.
 624     */
 625    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 626    ctx->Const.TextureBufferOffsetAlignment = 16;
 627    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 628
 629    if (brw->gen >= 6) {
 630       ctx->Const.MaxVarying = 32;
 631       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 632       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 633       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 634       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 635       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 636       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 637       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 638       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 639    }
 640
 641    /* We want the GLSL compiler to emit code that uses condition codes */
 642    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 643       ctx->Const.ShaderCompilerOptions[i] =
 644          brw->intelScreen->compiler->glsl_compiler_options[i];
 645    }
 646
 647    if (brw->gen >= 7) {
 648       ctx->Const.MaxViewportWidth = 32768;
 649       ctx->Const.MaxViewportHeight = 32768;
 650    }
 651
 652    /* ARB_viewport_array */
 653    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 654       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 655       ctx->Const.ViewportSubpixelBits = 0;
 656
 657       /* Cast to float before negating because MaxViewportWidth is unsigned.
 658        */
 659       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 660       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 661    }
 662
 663    /* ARB_gpu_shader5 */
 664    if (brw->gen >= 7)
 665       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 666
 667    /* ARB_framebuffer_no_attachments */
 668    ctx->Const.MaxFramebufferWidth = 16384;
 669    ctx->Const.MaxFramebufferHeight = 16384;
 670    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 671    ctx->Const.MaxFramebufferSamples = max_samples;
 672 }
 673
 674 static void
 675 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
 676 {
 677    struct gl_context *ctx = &brw->ctx;
 678
 679    /* For ES, we set these constants based on SIMD8.
 680     *
 681     * TODO: Once we can always generate SIMD16, we should update this.
 682     *
 683     * For GL, we assume we can generate a SIMD16 program, but this currently
 684     * is not always true. This allows us to run more test cases, and will be
 685     * required based on desktop GL compute shader requirements.
 686     */
 687    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 688
 689    const uint32_t max_invocations = simd_size * max_threads;
 690    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 691    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 692    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 693    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 694    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 695 }
 696
 697 /**
 698  * Process driconf (drirc) options, setting appropriate context flags.
 699  *
 700  * intelInitExtensions still pokes at optionCache directly, in order to
 701  * avoid advertising various extensions.  No flags are set, so it makes
 702  * sense to continue doing that there.
 703  */
 704 static void
 705 brw_process_driconf_options(struct brw_context *brw)
 706 {
 707    struct gl_context *ctx = &brw->ctx;
 708
 709    driOptionCache *options = &brw->optionCache;
 710    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 711                        brw->driContext->driScreenPriv->myNum, "i965");
 712
 713    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 714    switch (bo_reuse_mode) {
 715    case DRI_CONF_BO_REUSE_DISABLED:
 716       break;
 717    case DRI_CONF_BO_REUSE_ALL:
 718       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 719       break;
 720    }
 721
 722    if (!driQueryOptionb(options, "hiz")) {
 723        brw->has_hiz = false;
 724        /* On gen6, you can only do separate stencil with HIZ. */
 725        if (brw->gen == 6)
 726           brw->has_separate_stencil = false;
 727    }
 728
 729    if (driQueryOptionb(options, "always_flush_batch")) {
 730       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 731       brw->always_flush_batch = true;
 732    }
 733
 734    if (driQueryOptionb(options, "always_flush_cache")) {
 735       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 736       brw->always_flush_cache = true;
 737    }
 738
 739    if (driQueryOptionb(options, "disable_throttling")) {
 740       fprintf(stderr, "disabling flush throttling\n");
 741       brw->disable_throttling = true;
 742    }
 743
 744    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 745
 746    ctx->Const.ForceGLSLExtensionsWarn =
 747       driQueryOptionb(options, "force_glsl_extensions_warn");
 748
 749    ctx->Const.DisableGLSLLineContinuations =
 750       driQueryOptionb(options, "disable_glsl_line_continuations");
 751
 752    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 753       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 754
 755    brw->dual_color_blend_by_location =
 756       driQueryOptionb(options, "dual_color_blend_by_location");
 757 }
 758
 759 GLboolean
 760 brwCreateContext(gl_api api,
 761                  const struct gl_config *mesaVis,
 762                  __DRIcontext *driContextPriv,
 763                  unsigned major_version,
 764                  unsigned minor_version,
 765                  uint32_t flags,
 766                  bool notify_reset,
 767                  unsigned *dri_ctx_error,
 768                  void *sharedContextPrivate)
 769 {
 770    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 771    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 772    struct intel_screen *screen = sPriv->driverPrivate;
 773    const struct brw_device_info *devinfo = screen->devinfo;
 774    struct dd_function_table functions;
 775
 776    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 777     * provides us with context reset notifications.
 778     */
 779    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 780       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 781
 782    if (screen->has_context_reset_notification)
 783       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 784
 785    if (flags & ~allowed_flags) {
 786       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 787       return false;
 788    }
 789
 790    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 791    if (!brw) {
 792       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 793       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 794       return false;
 795    }
 796
 797    driContextPriv->driverPrivate = brw;
 798    brw->driContext = driContextPriv;
 799    brw->intelScreen = screen;
 800    brw->bufmgr = screen->bufmgr;
 801
 802    brw->gen = devinfo->gen;
 803    brw->gt = devinfo->gt;
 804    brw->is_g4x = devinfo->is_g4x;
 805    brw->is_baytrail = devinfo->is_baytrail;
 806    brw->is_haswell = devinfo->is_haswell;
 807    brw->is_cherryview = devinfo->is_cherryview;
 808    brw->is_broxton = devinfo->is_broxton;
 809    brw->has_llc = devinfo->has_llc;
 810    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 811    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 812    brw->has_pln = devinfo->has_pln;
 813    brw->has_compr4 = devinfo->has_compr4;
 814    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 815    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 816    brw->needs_unlit_centroid_workaround =
 817       devinfo->needs_unlit_centroid_workaround;
 818
 819    brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
 820    brw->has_swizzling = screen->hw_has_swizzling;
 821
 822    brw->vs.base.stage = MESA_SHADER_VERTEX;
 823    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 824    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 825    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 826    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 827    if (brw->gen >= 8) {
 828       gen8_init_vtable_surface_functions(brw);
 829       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 830    } else if (brw->gen >= 7) {
 831       gen7_init_vtable_surface_functions(brw);
 832       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 833    } else if (brw->gen >= 6) {
 834       gen6_init_vtable_surface_functions(brw);
 835       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 836    } else {
 837       gen4_init_vtable_surface_functions(brw);
 838       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 839    }
 840
 841    brw_init_driver_functions(brw, &functions);
 842
 843    if (notify_reset)
 844       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 845
 846    struct gl_context *ctx = &brw->ctx;
 847
 848    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 849       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 850       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 851       intelDestroyContext(driContextPriv);
 852       return false;
 853    }
 854
 855    driContextSetFlags(ctx, flags);
 856
 857    /* Initialize the software rasterizer and helper modules.
 858     *
 859     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 860     * software fallbacks (which we have to support on legacy GL to do weird
 861     * glDrawPixels(), glBitmap(), and other functions).
 862     */
 863    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 864       _swrast_CreateContext(ctx);
 865    }
 866
 867    _vbo_CreateContext(ctx);
 868    if (ctx->swrast_context) {
 869       _tnl_CreateContext(ctx);
 870       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 871       _swsetup_CreateContext(ctx);
 872
 873       /* Configure swrast to match hardware characteristics: */
 874       _swrast_allow_pixel_fog(ctx, false);
 875       _swrast_allow_vertex_fog(ctx, true);
 876    }
 877
 878    _mesa_meta_init(ctx);
 879
 880    brw_process_driconf_options(brw);
 881
 882    if (INTEL_DEBUG & DEBUG_PERF)
 883       brw->perf_debug = true;
 884
 885    brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
 886    brw_initialize_context_constants(brw);
 887
 888    ctx->Const.ResetStrategy = notify_reset
 889       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 890
 891    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 892    _mesa_init_point(ctx);
 893
 894    intel_fbo_init(brw);
 895
 896    intel_batchbuffer_init(brw);
 897
 898    if (brw->gen >= 6) {
 899       /* Create a new hardware context.  Using a hardware context means that
 900        * our GPU state will be saved/restored on context switch, allowing us
 901        * to assume that the GPU is in the same state we left it in.
 902        *
 903        * This is required for transform feedback buffer offsets, query objects,
 904        * and also allows us to reduce how much state we have to emit.
 905        */
 906       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 907
 908       if (!brw->hw_ctx) {
 909          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 910          intelDestroyContext(driContextPriv);
 911          return false;
 912       }
 913    }
 914
 915    if (brw_init_pipe_control(brw, devinfo)) {
 916       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 917       intelDestroyContext(driContextPriv);
 918       return false;
 919    }
 920
 921    brw_init_state(brw);
 922
 923    intelInitExtensions(ctx);
 924
 925    brw_init_surface_formats(brw);
 926
 927    brw->max_vs_threads = devinfo->max_vs_threads;
 928    brw->max_hs_threads = devinfo->max_hs_threads;
 929    brw->max_ds_threads = devinfo->max_ds_threads;
 930    brw->max_gs_threads = devinfo->max_gs_threads;
 931    brw->max_wm_threads = devinfo->max_wm_threads;
 932    /* FINISHME: Do this for all platforms that the kernel supports */
 933    if (brw->is_cherryview &&
 934        screen->subslice_total > 0 && screen->eu_total > 0) {
 935       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 936       brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 937    } else {
 938       brw->max_cs_threads = devinfo->max_cs_threads;
 939    }
 940    brw->urb.size = devinfo->urb.size;
 941    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 942    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 943    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 944    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 945    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 946
 947    /* Estimate the size of the mappable aperture into the GTT.  There's an
 948     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 949     * It turns out it's basically always 256MB, though some ancient hardware
 950     * was smaller.
 951     */
 952    uint32_t gtt_size = 256 * 1024 * 1024;
 953
 954    /* We don't want to map two objects such that a memcpy between them would
 955     * just fault one mapping in and then the other over and over forever.  So
 956     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 957     * taken up by things like the framebuffer and the ringbuffer and such, so
 958     * be more conservative.
 959     */
 960    brw->max_gtt_map_object_size = gtt_size / 4;
 961
 962    if (brw->gen == 6)
 963       brw->urb.gs_present = false;
 964
 965    brw->prim_restart.in_progress = false;
 966    brw->prim_restart.enable_cut_index = false;
 967    brw->gs.enabled = false;
 968    brw->sf.viewport_transform_enable = true;
 969
 970    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 971
 972    brw->use_resource_streamer = screen->has_resource_streamer &&
 973       (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
 974        env_var_as_boolean("INTEL_USE_GATHER", false));
 975
 976    ctx->VertexProgram._MaintainTnlProgram = true;
 977    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 978
 979    brw_draw_init( brw );
 980
 981    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 982       /* Turn on some extra GL_ARB_debug_output generation. */
 983       brw->perf_debug = true;
 984    }
 985
 986    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 987       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 988
 989    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 990       brw_init_shader_time(brw);
 991
 992    _mesa_compute_version(ctx);
 993
 994    _mesa_initialize_dispatch_tables(ctx);
 995    _mesa_initialize_vbo_vtxfmt(ctx);
 996
 997    if (ctx->Extensions.AMD_performance_monitor) {
 998       brw_init_performance_monitors(brw);
 999    }
1000
1001    vbo_use_buffer_objects(ctx);
1002    vbo_always_unmap_buffers(ctx);
1003
1004    return true;
1005 }
1006
1007 void
1008 intelDestroyContext(__DRIcontext * driContextPriv)
1009 {
1010    struct brw_context *brw =
1011       (struct brw_context *) driContextPriv->driverPrivate;
1012    struct gl_context *ctx = &brw->ctx;
1013
1014    /* Dump a final BMP in case the application doesn't call SwapBuffers */
1015    if (INTEL_DEBUG & DEBUG_AUB) {
1016       intel_batchbuffer_flush(brw);
1017       aub_dump_bmp(&brw->ctx);
1018    }
1019
1020    _mesa_meta_free(&brw->ctx);
1021    brw_meta_fast_clear_free(brw);
1022
1023    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1024       /* Force a report. */
1025       brw->shader_time.report_time = 0;
1026
1027       brw_collect_and_report_shader_time(brw);
1028       brw_destroy_shader_time(brw);
1029    }
1030
1031    brw_destroy_state(brw);
1032    brw_draw_destroy(brw);
1033
1034    drm_intel_bo_unreference(brw->curbe.curbe_bo);
1035    if (brw->vs.base.scratch_bo)
1036       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1037    if (brw->gs.base.scratch_bo)
1038       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1039    if (brw->wm.base.scratch_bo)
1040       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1041
1042    gen7_reset_hw_bt_pool_offsets(brw);
1043    drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1044    brw->hw_bt_pool.bo = NULL;
1045
1046    drm_intel_gem_context_destroy(brw->hw_ctx);
1047
1048    if (ctx->swrast_context) {
1049       _swsetup_DestroyContext(&brw->ctx);
1050       _tnl_DestroyContext(&brw->ctx);
1051    }
1052    _vbo_DestroyContext(&brw->ctx);
1053
1054    if (ctx->swrast_context)
1055       _swrast_DestroyContext(&brw->ctx);
1056
1057    brw_fini_pipe_control(brw);
1058    intel_batchbuffer_free(brw);
1059
1060    drm_intel_bo_unreference(brw->throttle_batch[1]);
1061    drm_intel_bo_unreference(brw->throttle_batch[0]);
1062    brw->throttle_batch[1] = NULL;
1063    brw->throttle_batch[0] = NULL;
1064
1065    driDestroyOptionCache(&brw->optionCache);
1066
1067    /* free the Mesa context */
1068    _mesa_free_context_data(&brw->ctx);
1069
1070    ralloc_free(brw);
1071    driContextPriv->driverPrivate = NULL;
1072 }
1073
1074 GLboolean
1075 intelUnbindContext(__DRIcontext * driContextPriv)
1076 {
1077    /* Unset current context and dispath table */
1078    _mesa_make_current(NULL, NULL, NULL);
1079
1080    return true;
1081 }
1082
1083 /**
1084  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1085  * on window system framebuffers.
1086  *
1087  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1088  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1089  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1090  * for a visual where you're guaranteed to be capable, but it turns out that
1091  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1092  * incapable ones, because there's no difference between the two in resources
1093  * used.  Applications thus get built that accidentally rely on the default
1094  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1095  * great...
1096  *
1097  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1098  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1099  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1100  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1101  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1102  * and get no sRGB encode (assuming that both kinds of visual are available).
1103  * Thus our choice to support sRGB by default on our visuals for desktop would
1104  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1105  *
1106  * Unfortunately, renderbuffer setup happens before a context is created.  So
1107  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1108  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1109  * yet), we go turn that back off before anyone finds out.
1110  */
1111 static void
1112 intel_gles3_srgb_workaround(struct brw_context *brw,
1113                             struct gl_framebuffer *fb)
1114 {
1115    struct gl_context *ctx = &brw->ctx;
1116
1117    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1118       return;
1119
1120    /* Some day when we support the sRGB capable bit on visuals available for
1121     * GLES, we'll need to respect that and not disable things here.
1122     */
1123    fb->Visual.sRGBCapable = false;
1124    for (int i = 0; i < BUFFER_COUNT; i++) {
1125       if (fb->Attachment[i].Renderbuffer &&
1126           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1127          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1128       }
1129    }
1130 }
1131
1132 GLboolean
1133 intelMakeCurrent(__DRIcontext * driContextPriv,
1134                  __DRIdrawable * driDrawPriv,
1135                  __DRIdrawable * driReadPriv)
1136 {
1137    struct brw_context *brw;
1138    GET_CURRENT_CONTEXT(curCtx);
1139
1140    if (driContextPriv)
1141       brw = (struct brw_context *) driContextPriv->driverPrivate;
1142    else
1143       brw = NULL;
1144
1145    /* According to the glXMakeCurrent() man page: "Pending commands to
1146     * the previous context, if any, are flushed before it is released."
1147     * But only flush if we're actually changing contexts.
1148     */
1149    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1150       _mesa_flush(curCtx);
1151    }
1152
1153    if (driContextPriv) {
1154       struct gl_context *ctx = &brw->ctx;
1155       struct gl_framebuffer *fb, *readFb;
1156
1157       if (driDrawPriv == NULL) {
1158          fb = _mesa_get_incomplete_framebuffer();
1159       } else {
1160          fb = driDrawPriv->driverPrivate;
1161          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1162       }
1163
1164       if (driReadPriv == NULL) {
1165          readFb = _mesa_get_incomplete_framebuffer();
1166       } else {
1167          readFb = driReadPriv->driverPrivate;
1168          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1169       }
1170
1171       /* The sRGB workaround changes the renderbuffer's format. We must change
1172        * the format before the renderbuffer's miptree get's allocated, otherwise
1173        * the formats of the renderbuffer and its miptree will differ.
1174        */
1175       intel_gles3_srgb_workaround(brw, fb);
1176       intel_gles3_srgb_workaround(brw, readFb);
1177
1178       /* If the context viewport hasn't been initialized, force a call out to
1179        * the loader to get buffers so we have a drawable size for the initial
1180        * viewport. */
1181       if (!brw->ctx.ViewportInitialized)
1182          intel_prepare_render(brw);
1183
1184       _mesa_make_current(ctx, fb, readFb);
1185    } else {
1186       _mesa_make_current(NULL, NULL, NULL);
1187    }
1188
1189    return true;
1190 }
1191
1192 void
1193 intel_resolve_for_dri2_flush(struct brw_context *brw,
1194                              __DRIdrawable *drawable)
1195 {
1196    if (brw->gen < 6) {
1197       /* MSAA and fast color clear are not supported, so don't waste time
1198        * checking whether a resolve is needed.
1199        */
1200       return;
1201    }
1202
1203    struct gl_framebuffer *fb = drawable->driverPrivate;
1204    struct intel_renderbuffer *rb;
1205
1206    /* Usually, only the back buffer will need to be downsampled. However,
1207     * the front buffer will also need it if the user has rendered into it.
1208     */
1209    static const gl_buffer_index buffers[2] = {
1210          BUFFER_BACK_LEFT,
1211          BUFFER_FRONT_LEFT,
1212    };
1213
1214    for (int i = 0; i < 2; ++i) {
1215       rb = intel_get_renderbuffer(fb, buffers[i]);
1216       if (rb == NULL || rb->mt == NULL)
1217          continue;
1218       if (rb->mt->num_samples <= 1)
1219          intel_miptree_resolve_color(brw, rb->mt, 0);
1220       else
1221          intel_renderbuffer_downsample(brw, rb);
1222    }
1223 }
1224
1225 static unsigned
1226 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1227 {
1228    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1229 }
1230
1231 static void
1232 intel_query_dri2_buffers(struct brw_context *brw,
1233                          __DRIdrawable *drawable,
1234                          __DRIbuffer **buffers,
1235                          int *count);
1236
1237 static void
1238 intel_process_dri2_buffer(struct brw_context *brw,
1239                           __DRIdrawable *drawable,
1240                           __DRIbuffer *buffer,
1241                           struct intel_renderbuffer *rb,
1242                           const char *buffer_name);
1243
1244 static void
1245 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1246
1247 static void
1248 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1249 {
1250    struct gl_framebuffer *fb = drawable->driverPrivate;
1251    struct intel_renderbuffer *rb;
1252    __DRIbuffer *buffers = NULL;
1253    int i, count;
1254    const char *region_name;
1255
1256    /* Set this up front, so that in case our buffers get invalidated
1257     * while we're getting new buffers, we don't clobber the stamp and
1258     * thus ignore the invalidate. */
1259    drawable->lastStamp = drawable->dri2.stamp;
1260
1261    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1262       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1263
1264    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1265
1266    if (buffers == NULL)
1267       return;
1268
1269    for (i = 0; i < count; i++) {
1270        switch (buffers[i].attachment) {
1271        case __DRI_BUFFER_FRONT_LEFT:
1272            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1273            region_name = "dri2 front buffer";
1274            break;
1275
1276        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1277            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1278            region_name = "dri2 fake front buffer";
1279            break;
1280
1281        case __DRI_BUFFER_BACK_LEFT:
1282            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1283            region_name = "dri2 back buffer";
1284            break;
1285
1286        case __DRI_BUFFER_DEPTH:
1287        case __DRI_BUFFER_HIZ:
1288        case __DRI_BUFFER_DEPTH_STENCIL:
1289        case __DRI_BUFFER_STENCIL:
1290        case __DRI_BUFFER_ACCUM:
1291        default:
1292            fprintf(stderr,
1293                    "unhandled buffer attach event, attachment type %d\n",
1294                    buffers[i].attachment);
1295            return;
1296        }
1297
1298        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1299    }
1300
1301 }
1302
1303 void
1304 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1305 {
1306    struct brw_context *brw = context->driverPrivate;
1307    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1308
1309    /* Set this up front, so that in case our buffers get invalidated
1310     * while we're getting new buffers, we don't clobber the stamp and
1311     * thus ignore the invalidate. */
1312    drawable->lastStamp = drawable->dri2.stamp;
1313
1314    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1315       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1316
1317    if (screen->image.loader)
1318       intel_update_image_buffers(brw, drawable);
1319    else
1320       intel_update_dri2_buffers(brw, drawable);
1321
1322    driUpdateFramebufferSize(&brw->ctx, drawable);
1323 }
1324
1325 /**
1326  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1327  * state is required.
1328  */
1329 void
1330 intel_prepare_render(struct brw_context *brw)
1331 {
1332    struct gl_context *ctx = &brw->ctx;
1333    __DRIcontext *driContext = brw->driContext;
1334    __DRIdrawable *drawable;
1335
1336    drawable = driContext->driDrawablePriv;
1337    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1338       if (drawable->lastStamp != drawable->dri2.stamp)
1339          intel_update_renderbuffers(driContext, drawable);
1340       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1341    }
1342
1343    drawable = driContext->driReadablePriv;
1344    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1345       if (drawable->lastStamp != drawable->dri2.stamp)
1346          intel_update_renderbuffers(driContext, drawable);
1347       driContext->dri2.read_stamp = drawable->dri2.stamp;
1348    }
1349
1350    /* If we're currently rendering to the front buffer, the rendering
1351     * that will happen next will probably dirty the front buffer.  So
1352     * mark it as dirty here.
1353     */
1354    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1355       brw->front_buffer_dirty = true;
1356 }
1357
1358 /**
1359  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1360  *
1361  * To determine which DRI buffers to request, examine the renderbuffers
1362  * attached to the drawable's framebuffer. Then request the buffers with
1363  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1364  *
1365  * This is called from intel_update_renderbuffers().
1366  *
1367  * \param drawable      Drawable whose buffers are queried.
1368  * \param buffers       [out] List of buffers returned by DRI2 query.
1369  * \param buffer_count  [out] Number of buffers returned.
1370  *
1371  * \see intel_update_renderbuffers()
1372  * \see DRI2GetBuffers()
1373  * \see DRI2GetBuffersWithFormat()
1374  */
1375 static void
1376 intel_query_dri2_buffers(struct brw_context *brw,
1377                          __DRIdrawable *drawable,
1378                          __DRIbuffer **buffers,
1379                          int *buffer_count)
1380 {
1381    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1382    struct gl_framebuffer *fb = drawable->driverPrivate;
1383    int i = 0;
1384    unsigned attachments[8];
1385
1386    struct intel_renderbuffer *front_rb;
1387    struct intel_renderbuffer *back_rb;
1388
1389    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1390    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1391
1392    memset(attachments, 0, sizeof(attachments));
1393    if ((_mesa_is_front_buffer_drawing(fb) ||
1394         _mesa_is_front_buffer_reading(fb) ||
1395         !back_rb) && front_rb) {
1396       /* If a fake front buffer is in use, then querying for
1397        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1398        * the real front buffer to the fake front buffer.  So before doing the
1399        * query, we need to make sure all the pending drawing has landed in the
1400        * real front buffer.
1401        */
1402       intel_batchbuffer_flush(brw);
1403       intel_flush_front(&brw->ctx);
1404
1405       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1406       attachments[i++] = intel_bits_per_pixel(front_rb);
1407    } else if (front_rb && brw->front_buffer_dirty) {
1408       /* We have pending front buffer rendering, but we aren't querying for a
1409        * front buffer.  If the front buffer we have is a fake front buffer,
1410        * the X server is going to throw it away when it processes the query.
1411        * So before doing the query, make sure all the pending drawing has
1412        * landed in the real front buffer.
1413        */
1414       intel_batchbuffer_flush(brw);
1415       intel_flush_front(&brw->ctx);
1416    }
1417
1418    if (back_rb) {
1419       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1420       attachments[i++] = intel_bits_per_pixel(back_rb);
1421    }
1422
1423    assert(i <= ARRAY_SIZE(attachments));
1424
1425    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1426                                                         &drawable->w,
1427                                                         &drawable->h,
1428                                                         attachments, i / 2,
1429                                                         buffer_count,
1430                                                         drawable->loaderPrivate);
1431 }
1432
1433 /**
1434  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1435  *
1436  * This is called from intel_update_renderbuffers().
1437  *
1438  * \par Note:
1439  *    DRI buffers whose attachment point is DRI2BufferStencil or
1440  *    DRI2BufferDepthStencil are handled as special cases.
1441  *
1442  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1443  *        that is passed to drm_intel_bo_gem_create_from_name().
1444  *
1445  * \see intel_update_renderbuffers()
1446  */
1447 static void
1448 intel_process_dri2_buffer(struct brw_context *brw,
1449                           __DRIdrawable *drawable,
1450                           __DRIbuffer *buffer,
1451                           struct intel_renderbuffer *rb,
1452                           const char *buffer_name)
1453 {
1454    struct gl_framebuffer *fb = drawable->driverPrivate;
1455    drm_intel_bo *bo;
1456
1457    if (!rb)
1458       return;
1459
1460    unsigned num_samples = rb->Base.Base.NumSamples;
1461
1462    /* We try to avoid closing and reopening the same BO name, because the first
1463     * use of a mapping of the buffer involves a bunch of page faulting which is
1464     * moderately expensive.
1465     */
1466    struct intel_mipmap_tree *last_mt;
1467    if (num_samples == 0)
1468       last_mt = rb->mt;
1469    else
1470       last_mt = rb->singlesample_mt;
1471
1472    uint32_t old_name = 0;
1473    if (last_mt) {
1474        /* The bo already has a name because the miptree was created by a
1475         * previous call to intel_process_dri2_buffer(). If a bo already has a
1476         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1477         * create a new name.
1478         */
1479       drm_intel_bo_flink(last_mt->bo, &old_name);
1480    }
1481
1482    if (old_name == buffer->name)
1483       return;
1484
1485    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1486       fprintf(stderr,
1487               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1488               buffer->name, buffer->attachment,
1489               buffer->cpp, buffer->pitch);
1490    }
1491
1492    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1493                                           buffer->name);
1494    if (!bo) {
1495       fprintf(stderr,
1496               "Failed to open BO for returned DRI2 buffer "
1497               "(%dx%d, %s, named %d).\n"
1498               "This is likely a bug in the X Server that will lead to a "
1499               "crash soon.\n",
1500               drawable->w, drawable->h, buffer_name, buffer->name);
1501       return;
1502    }
1503
1504    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1505                                             drawable->w, drawable->h,
1506                                             buffer->pitch);
1507
1508    if (_mesa_is_front_buffer_drawing(fb) &&
1509        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1510         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1511        rb->Base.Base.NumSamples > 1) {
1512       intel_renderbuffer_upsample(brw, rb);
1513    }
1514
1515    assert(rb->mt);
1516
1517    drm_intel_bo_unreference(bo);
1518 }
1519
1520 /**
1521  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1522  *
1523  * To determine which DRI buffers to request, examine the renderbuffers
1524  * attached to the drawable's framebuffer. Then request the buffers from
1525  * the image loader
1526  *
1527  * This is called from intel_update_renderbuffers().
1528  *
1529  * \param drawable      Drawable whose buffers are queried.
1530  * \param buffers       [out] List of buffers returned by DRI2 query.
1531  * \param buffer_count  [out] Number of buffers returned.
1532  *
1533  * \see intel_update_renderbuffers()
1534  */
1535
1536 static void
1537 intel_update_image_buffer(struct brw_context *intel,
1538                           __DRIdrawable *drawable,
1539                           struct intel_renderbuffer *rb,
1540                           __DRIimage *buffer,
1541                           enum __DRIimageBufferMask buffer_type)
1542 {
1543    struct gl_framebuffer *fb = drawable->driverPrivate;
1544
1545    if (!rb || !buffer->bo)
1546       return;
1547
1548    unsigned num_samples = rb->Base.Base.NumSamples;
1549
1550    /* Check and see if we're already bound to the right
1551     * buffer object
1552     */
1553    struct intel_mipmap_tree *last_mt;
1554    if (num_samples == 0)
1555       last_mt = rb->mt;
1556    else
1557       last_mt = rb->singlesample_mt;
1558
1559    if (last_mt && last_mt->bo == buffer->bo)
1560       return;
1561
1562    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1563                                             buffer->width, buffer->height,
1564                                             buffer->pitch);
1565
1566    if (_mesa_is_front_buffer_drawing(fb) &&
1567        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1568        rb->Base.Base.NumSamples > 1) {
1569       intel_renderbuffer_upsample(intel, rb);
1570    }
1571 }
1572
1573 static void
1574 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1575 {
1576    struct gl_framebuffer *fb = drawable->driverPrivate;
1577    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1578    struct intel_renderbuffer *front_rb;
1579    struct intel_renderbuffer *back_rb;
1580    struct __DRIimageList images;
1581    unsigned int format;
1582    uint32_t buffer_mask = 0;
1583
1584    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1585    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1586
1587    if (back_rb)
1588       format = intel_rb_format(back_rb);
1589    else if (front_rb)
1590       format = intel_rb_format(front_rb);
1591    else
1592       return;
1593
1594    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1595                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1596       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1597    }
1598
1599    if (back_rb)
1600       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1601
1602    (*screen->image.loader->getBuffers) (drawable,
1603                                         driGLFormatToImageFormat(format),
1604                                         &drawable->dri2.stamp,
1605                                         drawable->loaderPrivate,
1606                                         buffer_mask,
1607                                         &images);
1608
1609    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1610       drawable->w = images.front->width;
1611       drawable->h = images.front->height;
1612       intel_update_image_buffer(brw,
1613                                 drawable,
1614                                 front_rb,
1615                                 images.front,
1616                                 __DRI_IMAGE_BUFFER_FRONT);
1617    }
1618    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1619       drawable->w = images.back->width;
1620       drawable->h = images.back->height;
1621       intel_update_image_buffer(brw,
1622                                 drawable,
1623                                 back_rb,
1624                                 images.back,
1625                                 __DRI_IMAGE_BUFFER_BACK);
1626    }
1627 }