src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keith@tungstengraphics.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/imports.h"
  38 #include "main/macros.h"
  39 #include "main/points.h"
  40 #include "main/simple_list.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43
  44 #include "vbo/vbo_context.h"
  45
  46 #include "drivers/common/driverfuncs.h"
  47 #include "drivers/common/meta.h"
  48 #include "utils.h"
  49
  50 #include "brw_context.h"
  51 #include "brw_defines.h"
  52 #include "brw_draw.h"
  53 #include "brw_state.h"
  54
  55 #include "intel_batchbuffer.h"
  56 #include "intel_buffer_objects.h"
  57 #include "intel_buffers.h"
  58 #include "intel_fbo.h"
  59 #include "intel_mipmap_tree.h"
  60 #include "intel_pixel.h"
  61 #include "intel_regions.h"
  62 #include "intel_tex.h"
  63 #include "intel_tex_obj.h"
  64
  65 #include "swrast_setup/swrast_setup.h"
  66 #include "tnl/tnl.h"
  67 #include "tnl/t_pipeline.h"
  68 #include "glsl/ralloc.h"
  69
  70 /***************************************
  71  * Mesa's Driver Functions
  72  ***************************************/
  73
  74 static size_t
  75 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  76                              GLenum internalFormat, int samples[16])
  77 {
  78    struct brw_context *brw = brw_context(ctx);
  79
  80    (void) target;
  81
  82    switch (brw->gen) {
  83    case 7:
  84       samples[0] = 8;
  85       samples[1] = 4;
  86       return 2;
  87
  88    case 6:
  89       samples[0] = 4;
  90       return 1;
  91
  92    default:
  93       samples[0] = 1;
  94       return 1;
  95    }
  96 }
  97
  98 static const GLubyte *
  99 intelGetString(struct gl_context * ctx, GLenum name)
 100 {
 101    const struct brw_context *const brw = brw_context(ctx);
 102    const char *chipset;
 103    static char buffer[128];
 104
 105    switch (name) {
 106    case GL_VENDOR:
 107       return (GLubyte *) "Intel Open Source Technology Center";
 108       break;
 109
 110    case GL_RENDERER:
 111       switch (brw->intelScreen->deviceID) {
 112 #undef CHIPSET
 113 #define CHIPSET(id, family, str) case id: chipset = str; break;
 114 #include "pci_ids/i965_pci_ids.h"
 115       default:
 116          chipset = "Unknown Intel Chipset";
 117          break;
 118       }
 119
 120       (void) driGetRendererString(buffer, chipset, 0);
 121       return (GLubyte *) buffer;
 122
 123    default:
 124       return NULL;
 125    }
 126 }
 127
 128 static void
 129 intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 130 {
 131    struct brw_context *brw = brw_context(ctx);
 132    __DRIcontext *driContext = brw->driContext;
 133
 134    if (brw->saved_viewport)
 135       brw->saved_viewport(ctx, x, y, w, h);
 136
 137    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 138       dri2InvalidateDrawable(driContext->driDrawablePriv);
 139       dri2InvalidateDrawable(driContext->driReadablePriv);
 140    }
 141 }
 142
 143 static void
 144 intelInvalidateState(struct gl_context * ctx, GLuint new_state)
 145 {
 146    struct brw_context *brw = brw_context(ctx);
 147
 148    if (ctx->swrast_context)
 149       _swrast_InvalidateState(ctx, new_state);
 150    _vbo_InvalidateState(ctx, new_state);
 151
 152    brw->NewGLState |= new_state;
 153 }
 154
 155 static void
 156 intel_flush_front(struct gl_context *ctx)
 157 {
 158    struct brw_context *brw = brw_context(ctx);
 159    __DRIcontext *driContext = brw->driContext;
 160    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 161    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 162
 163    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 164       if (screen->dri2.loader->flushFrontBuffer != NULL &&
 165           driDrawable &&
 166           driDrawable->loaderPrivate) {
 167
 168          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 169           *
 170           * This potentially resolves both front and back buffer. It
 171           * is unnecessary to resolve the back, but harms nothing except
 172           * performance. And no one cares about front-buffer render
 173           * performance.
 174           */
 175          intel_resolve_for_dri2_flush(brw, driDrawable);
 176          intel_batchbuffer_flush(brw);
 177
 178          screen->dri2.loader->flushFrontBuffer(driDrawable,
 179                                                driDrawable->loaderPrivate);
 180
 181          /* We set the dirty bit in intel_prepare_render() if we're
 182           * front buffer rendering once we get there.
 183           */
 184          brw->front_buffer_dirty = false;
 185       }
 186    }
 187 }
 188
 189 static void
 190 intel_glFlush(struct gl_context *ctx)
 191 {
 192    struct brw_context *brw = brw_context(ctx);
 193
 194    intel_batchbuffer_flush(brw);
 195    intel_flush_front(ctx);
 196    if (brw->is_front_buffer_rendering)
 197       brw->need_throttle = true;
 198 }
 199
 200 void
 201 intelFinish(struct gl_context * ctx)
 202 {
 203    struct brw_context *brw = brw_context(ctx);
 204
 205    intel_glFlush(ctx);
 206
 207    if (brw->batch.last_bo)
 208       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 209 }
 210
 211 static void
 212 brw_init_driver_functions(struct brw_context *brw,
 213                           struct dd_function_table *functions)
 214 {
 215    _mesa_init_driver_functions(functions);
 216
 217    /* GLX uses DRI2 invalidate events to handle window resizing.
 218     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 219     * which doesn't provide a mechanism for snooping the event queues.
 220     *
 221     * So EGL still relies on viewport hacks to handle window resizing.
 222     * This should go away with DRI3000.
 223     */
 224    if (!brw->driContext->driScreenPriv->dri2.useInvalidate) {
 225       brw->saved_viewport = functions->Viewport;
 226       functions->Viewport = intel_viewport;
 227    }
 228
 229    functions->Flush = intel_glFlush;
 230    functions->Finish = intelFinish;
 231    functions->GetString = intelGetString;
 232    functions->UpdateState = intelInvalidateState;
 233
 234    intelInitTextureFuncs(functions);
 235    intelInitTextureImageFuncs(functions);
 236    intelInitTextureSubImageFuncs(functions);
 237    intelInitTextureCopyImageFuncs(functions);
 238    intelInitClearFuncs(functions);
 239    intelInitBufferFuncs(functions);
 240    intelInitPixelFuncs(functions);
 241    intelInitBufferObjectFuncs(functions);
 242    intel_init_syncobj_functions(functions);
 243    brw_init_object_purgeable_functions(functions);
 244
 245    brwInitFragProgFuncs( functions );
 246    brw_init_common_queryobj_functions(functions);
 247    if (brw->gen >= 6)
 248       gen6_init_queryobj_functions(functions);
 249    else
 250       gen4_init_queryobj_functions(functions);
 251
 252    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 253
 254    if (brw->gen >= 7) {
 255       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 256       functions->EndTransformFeedback = gen7_end_transform_feedback;
 257    } else {
 258       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 259       functions->EndTransformFeedback = brw_end_transform_feedback;
 260    }
 261
 262    if (brw->gen >= 6)
 263       functions->GetSamplePosition = gen6_get_sample_position;
 264 }
 265
 266 static void
 267 brw_initialize_context_constants(struct brw_context *brw)
 268 {
 269    struct gl_context *ctx = &brw->ctx;
 270
 271    ctx->Const.QueryCounterBits.Timestamp = 36;
 272
 273    ctx->Const.StripTextureBorder = true;
 274
 275    ctx->Const.MaxDualSourceDrawBuffers = 1;
 276    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 277    ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 278    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 279    ctx->Const.MaxTextureUnits =
 280       MIN2(ctx->Const.MaxTextureCoordUnits,
 281            ctx->Const.FragmentProgram.MaxTextureImageUnits);
 282    ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 283    ctx->Const.MaxCombinedTextureImageUnits =
 284       ctx->Const.VertexProgram.MaxTextureImageUnits +
 285       ctx->Const.FragmentProgram.MaxTextureImageUnits;
 286
 287    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 288    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 289       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 290    ctx->Const.Max3DTextureLevels = 9;
 291    ctx->Const.MaxCubeTextureLevels = 12;
 292
 293    if (brw->gen >= 7)
 294       ctx->Const.MaxArrayTextureLayers = 2048;
 295    else
 296       ctx->Const.MaxArrayTextureLayers = 512;
 297
 298    ctx->Const.MaxTextureRectSize = 1 << 12;
 299
 300    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 301
 302    ctx->Const.MaxRenderbufferSize = 8192;
 303
 304    /* Hardware only supports a limited number of transform feedback buffers.
 305     * So we need to override the Mesa default (which is based only on software
 306     * limits).
 307     */
 308    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 309
 310    /* On Gen6, in the worst case, we use up one binding table entry per
 311     * transform feedback component (see comments above the definition of
 312     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 313     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 314     * BRW_MAX_SOL_BINDINGS.
 315     *
 316     * In "separate components" mode, we need to divide this value by
 317     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 318     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 319     */
 320    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 321    ctx->Const.MaxTransformFeedbackSeparateComponents =
 322       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 323
 324    if (brw->gen == 6) {
 325       ctx->Const.MaxSamples = 4;
 326       ctx->Const.MaxColorTextureSamples = 4;
 327       ctx->Const.MaxDepthTextureSamples = 4;
 328       ctx->Const.MaxIntegerSamples = 4;
 329    } else if (brw->gen >= 7) {
 330       ctx->Const.MaxSamples = 8;
 331       ctx->Const.MaxColorTextureSamples = 8;
 332       ctx->Const.MaxDepthTextureSamples = 8;
 333       ctx->Const.MaxIntegerSamples = 8;
 334       ctx->Const.MaxProgramTextureGatherComponents = 4;
 335    }
 336
 337    ctx->Const.MinLineWidth = 1.0;
 338    ctx->Const.MinLineWidthAA = 1.0;
 339    ctx->Const.MaxLineWidth = 5.0;
 340    ctx->Const.MaxLineWidthAA = 5.0;
 341    ctx->Const.LineWidthGranularity = 0.5;
 342
 343    ctx->Const.MinPointSize = 1.0;
 344    ctx->Const.MinPointSizeAA = 1.0;
 345    ctx->Const.MaxPointSize = 255.0;
 346    ctx->Const.MaxPointSizeAA = 255.0;
 347    ctx->Const.PointSizeGranularity = 1.0;
 348
 349    if (brw->gen >= 5 || brw->is_g4x)
 350       ctx->Const.MaxClipPlanes = 8;
 351
 352    ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
 353    ctx->Const.VertexProgram.MaxAluInstructions = 0;
 354    ctx->Const.VertexProgram.MaxTexInstructions = 0;
 355    ctx->Const.VertexProgram.MaxTexIndirections = 0;
 356    ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
 357    ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
 358    ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
 359    ctx->Const.VertexProgram.MaxNativeAttribs = 16;
 360    ctx->Const.VertexProgram.MaxNativeTemps = 256;
 361    ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
 362    ctx->Const.VertexProgram.MaxNativeParameters = 1024;
 363    ctx->Const.VertexProgram.MaxEnvParams =
 364       MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
 365            ctx->Const.VertexProgram.MaxEnvParams);
 366
 367    ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
 368    ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
 369    ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
 370    ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
 371    ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
 372    ctx->Const.FragmentProgram.MaxNativeTemps = 256;
 373    ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
 374    ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
 375    ctx->Const.FragmentProgram.MaxEnvParams =
 376       MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
 377            ctx->Const.FragmentProgram.MaxEnvParams);
 378
 379    /* Fragment shaders use real, 32-bit twos-complement integers for all
 380     * integer types.
 381     */
 382    ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
 383    ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
 384    ctx->Const.FragmentProgram.LowInt.Precision = 0;
 385    ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
 386    ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
 387
 388    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 389     * but we're not sure how it's actually done for vertex order,
 390     * that affect provoking vertex decision. Always use last vertex
 391     * convention for quad primitive which works as expected for now.
 392     */
 393    if (brw->gen >= 6)
 394       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 395
 396    ctx->Const.NativeIntegers = true;
 397    ctx->Const.UniformBooleanTrue = 1;
 398    ctx->Const.UniformBufferOffsetAlignment = 16;
 399
 400    if (brw->gen >= 6) {
 401       ctx->Const.MaxVarying = 32;
 402       ctx->Const.VertexProgram.MaxOutputComponents = 128;
 403       ctx->Const.GeometryProgram.MaxInputComponents = 128;
 404       ctx->Const.GeometryProgram.MaxOutputComponents = 128;
 405       ctx->Const.FragmentProgram.MaxInputComponents = 128;
 406    }
 407
 408    /* We want the GLSL compiler to emit code that uses condition codes */
 409    for (int i = 0; i < MESA_SHADER_TYPES; i++) {
 410       ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 411       ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
 412       ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
 413       ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
 414       ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 415       ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
 416
 417       ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
 418          (i == MESA_SHADER_FRAGMENT);
 419       ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
 420          (i == MESA_SHADER_FRAGMENT);
 421       ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
 422    }
 423
 424    ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
 425 }
 426
 427 /**
 428  * Process driconf (drirc) options, setting appropriate context flags.
 429  *
 430  * intelInitExtensions still pokes at optionCache directly, in order to
 431  * avoid advertising various extensions.  No flags are set, so it makes
 432  * sense to continue doing that there.
 433  */
 434 static void
 435 brw_process_driconf_options(struct brw_context *brw)
 436 {
 437    struct gl_context *ctx = &brw->ctx;
 438
 439    driOptionCache *options = &brw->optionCache;
 440    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 441                        brw->driContext->driScreenPriv->myNum, "i965");
 442
 443    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 444    switch (bo_reuse_mode) {
 445    case DRI_CONF_BO_REUSE_DISABLED:
 446       break;
 447    case DRI_CONF_BO_REUSE_ALL:
 448       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 449       break;
 450    }
 451
 452    if (!driQueryOptionb(options, "hiz")) {
 453        brw->has_hiz = false;
 454        /* On gen6, you can only do separate stencil with HIZ. */
 455        if (brw->gen == 6)
 456           brw->has_separate_stencil = false;
 457    }
 458
 459    if (driQueryOptionb(options, "always_flush_batch")) {
 460       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 461       brw->always_flush_batch = true;
 462    }
 463
 464    if (driQueryOptionb(options, "always_flush_cache")) {
 465       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 466       brw->always_flush_cache = true;
 467    }
 468
 469    if (driQueryOptionb(options, "disable_throttling")) {
 470       fprintf(stderr, "disabling flush throttling\n");
 471       brw->disable_throttling = true;
 472    }
 473
 474    brw->disable_derivative_optimization =
 475       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 476
 477    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 478
 479    ctx->Const.ForceGLSLExtensionsWarn =
 480       driQueryOptionb(options, "force_glsl_extensions_warn");
 481
 482    ctx->Const.DisableGLSLLineContinuations =
 483       driQueryOptionb(options, "disable_glsl_line_continuations");
 484 }
 485
 486 bool
 487 brwCreateContext(gl_api api,
 488                  const struct gl_config *mesaVis,
 489                  __DRIcontext *driContextPriv,
 490                  unsigned major_version,
 491                  unsigned minor_version,
 492                  uint32_t flags,
 493                  unsigned *dri_ctx_error,
 494                  void *sharedContextPrivate)
 495 {
 496    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 497    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 498    struct intel_screen *screen = sPriv->driverPrivate;
 499    const struct brw_device_info *devinfo = screen->devinfo;
 500    struct dd_function_table functions;
 501    struct gl_config visual;
 502
 503    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 504    if (!brw) {
 505       printf("%s: failed to alloc context\n", __FUNCTION__);
 506       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 507       return false;
 508    }
 509
 510    driContextPriv->driverPrivate = brw;
 511    brw->driContext = driContextPriv;
 512    brw->intelScreen = screen;
 513    brw->bufmgr = screen->bufmgr;
 514
 515    brw->gen = devinfo->gen;
 516    brw->gt = devinfo->gt;
 517    brw->is_g4x = devinfo->is_g4x;
 518    brw->is_baytrail = devinfo->is_baytrail;
 519    brw->is_haswell = devinfo->is_haswell;
 520    brw->has_llc = devinfo->has_llc;
 521    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 522    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 523    brw->has_pln = devinfo->has_pln;
 524    brw->has_compr4 = devinfo->has_compr4;
 525    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 526    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 527    brw->needs_unlit_centroid_workaround =
 528       devinfo->needs_unlit_centroid_workaround;
 529
 530    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 531    brw->has_swizzling = screen->hw_has_swizzling;
 532
 533    brwInitVtbl( brw );
 534
 535    brw_init_driver_functions(brw, &functions);
 536
 537    struct gl_context *ctx = &brw->ctx;
 538
 539    if (mesaVis == NULL) {
 540       memset(&visual, 0, sizeof visual);
 541       mesaVis = &visual;
 542    }
 543
 544    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 545       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 546       printf("%s: failed to init mesa context\n", __FUNCTION__);
 547       intelDestroyContext(driContextPriv);
 548       return false;
 549    }
 550
 551    /* Initialize the software rasterizer and helper modules.
 552     *
 553     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 554     * software fallbacks (which we have to support on legacy GL to do weird
 555     * glDrawPixels(), glBitmap(), and other functions).
 556     */
 557    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 558       _swrast_CreateContext(ctx);
 559    }
 560
 561    _vbo_CreateContext(ctx);
 562    if (ctx->swrast_context) {
 563       _tnl_CreateContext(ctx);
 564       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 565       _swsetup_CreateContext(ctx);
 566
 567       /* Configure swrast to match hardware characteristics: */
 568       _swrast_allow_pixel_fog(ctx, false);
 569       _swrast_allow_vertex_fog(ctx, true);
 570    }
 571
 572    _mesa_meta_init(ctx);
 573
 574    brw_process_driconf_options(brw);
 575    brw_process_intel_debug_variable(brw);
 576    brw_initialize_context_constants(brw);
 577
 578    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 579    _mesa_init_point(ctx);
 580
 581    intelInitExtensions(ctx);
 582
 583    intel_batchbuffer_init(brw);
 584
 585    intel_fbo_init(brw);
 586
 587    if (brw->gen >= 6) {
 588       /* Create a new hardware context.  Using a hardware context means that
 589        * our GPU state will be saved/restored on context switch, allowing us
 590        * to assume that the GPU is in the same state we left it in.
 591        *
 592        * This is required for transform feedback buffer offsets, query objects,
 593        * and also allows us to reduce how much state we have to emit.
 594        */
 595       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 596
 597       if (!brw->hw_ctx) {
 598          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 599          intelDestroyContext(driContextPriv);
 600          return false;
 601       }
 602    }
 603
 604    brw_init_surface_formats(brw);
 605
 606    if (brw->is_g4x || brw->gen >= 5) {
 607       brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
 608       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
 609   } else {
 610       brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
 611       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
 612    }
 613
 614    brw->max_vs_threads = devinfo->max_vs_threads;
 615    brw->max_gs_threads = devinfo->max_gs_threads;
 616    brw->max_wm_threads = devinfo->max_wm_threads;
 617    brw->urb.size = devinfo->urb.size;
 618    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 619    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 620    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 621
 622    /* Estimate the size of the mappable aperture into the GTT.  There's an
 623     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 624     * It turns out it's basically always 256MB, though some ancient hardware
 625     * was smaller.
 626     */
 627    uint32_t gtt_size = 256 * 1024 * 1024;
 628
 629    /* We don't want to map two objects such that a memcpy between them would
 630     * just fault one mapping in and then the other over and over forever.  So
 631     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 632     * taken up by things like the framebuffer and the ringbuffer and such, so
 633     * be more conservative.
 634     */
 635    brw->max_gtt_map_object_size = gtt_size / 4;
 636
 637    if (brw->gen == 6)
 638       brw->urb.gen6_gs_previously_active = false;
 639
 640    brw->prim_restart.in_progress = false;
 641    brw->prim_restart.enable_cut_index = false;
 642
 643    brw_init_state( brw );
 644
 645    if (brw->gen < 6) {
 646       brw->curbe.last_buf = calloc(1, 4096);
 647       brw->curbe.next_buf = calloc(1, 4096);
 648    }
 649
 650    ctx->VertexProgram._MaintainTnlProgram = true;
 651    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 652
 653    brw_draw_init( brw );
 654
 655    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 656       /* Turn on some extra GL_ARB_debug_output generation. */
 657       brw->perf_debug = true;
 658    }
 659
 660    brw_fs_alloc_reg_sets(brw);
 661    brw_vec4_alloc_reg_set(brw);
 662
 663    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 664       brw_init_shader_time(brw);
 665
 666    _mesa_compute_version(ctx);
 667
 668    _mesa_initialize_dispatch_tables(ctx);
 669    _mesa_initialize_vbo_vtxfmt(ctx);
 670
 671    if (ctx->Extensions.AMD_performance_monitor) {
 672       brw_init_performance_monitors(brw);
 673    }
 674
 675    return true;
 676 }
 677
 678 void
 679 intelDestroyContext(__DRIcontext * driContextPriv)
 680 {
 681    struct brw_context *brw =
 682       (struct brw_context *) driContextPriv->driverPrivate;
 683    struct gl_context *ctx = &brw->ctx;
 684
 685    assert(brw); /* should never be null */
 686    if (!brw)
 687       return;
 688
 689    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 690    if (INTEL_DEBUG & DEBUG_AUB) {
 691       intel_batchbuffer_flush(brw);
 692       aub_dump_bmp(&brw->ctx);
 693    }
 694
 695    _mesa_meta_free(&brw->ctx);
 696
 697    brw->vtbl.destroy(brw);
 698
 699    if (ctx->swrast_context) {
 700       _swsetup_DestroyContext(&brw->ctx);
 701       _tnl_DestroyContext(&brw->ctx);
 702    }
 703    _vbo_DestroyContext(&brw->ctx);
 704
 705    if (ctx->swrast_context)
 706       _swrast_DestroyContext(&brw->ctx);
 707
 708    intel_batchbuffer_free(brw);
 709
 710    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 711    brw->first_post_swapbuffers_batch = NULL;
 712
 713    driDestroyOptionCache(&brw->optionCache);
 714
 715    /* free the Mesa context */
 716    _mesa_free_context_data(&brw->ctx);
 717
 718    ralloc_free(brw);
 719    driContextPriv->driverPrivate = NULL;
 720 }
 721
 722 GLboolean
 723 intelUnbindContext(__DRIcontext * driContextPriv)
 724 {
 725    /* Unset current context and dispath table */
 726    _mesa_make_current(NULL, NULL, NULL);
 727
 728    return true;
 729 }
 730
 731 /**
 732  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 733  * on window system framebuffers.
 734  *
 735  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 736  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 737  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 738  * for a visual where you're guaranteed to be capable, but it turns out that
 739  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 740  * incapable ones, becuase there's no difference between the two in resources
 741  * used.  Applications thus get built that accidentally rely on the default
 742  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 743  * great...
 744  *
 745  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 746  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 747  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 748  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 749  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 750  * and get no sRGB encode (assuming that both kinds of visual are available).
 751  * Thus our choice to support sRGB by default on our visuals for desktop would
 752  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 753  *
 754  * Unfortunately, renderbuffer setup happens before a context is created.  So
 755  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 756  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 757  * yet), we go turn that back off before anyone finds out.
 758  */
 759 static void
 760 intel_gles3_srgb_workaround(struct brw_context *brw,
 761                             struct gl_framebuffer *fb)
 762 {
 763    struct gl_context *ctx = &brw->ctx;
 764
 765    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 766       return;
 767
 768    /* Some day when we support the sRGB capable bit on visuals available for
 769     * GLES, we'll need to respect that and not disable things here.
 770     */
 771    fb->Visual.sRGBCapable = false;
 772    for (int i = 0; i < BUFFER_COUNT; i++) {
 773       if (fb->Attachment[i].Renderbuffer &&
 774           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
 775          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
 776       }
 777    }
 778 }
 779
 780 GLboolean
 781 intelMakeCurrent(__DRIcontext * driContextPriv,
 782                  __DRIdrawable * driDrawPriv,
 783                  __DRIdrawable * driReadPriv)
 784 {
 785    struct brw_context *brw;
 786    GET_CURRENT_CONTEXT(curCtx);
 787
 788    if (driContextPriv)
 789       brw = (struct brw_context *) driContextPriv->driverPrivate;
 790    else
 791       brw = NULL;
 792
 793    /* According to the glXMakeCurrent() man page: "Pending commands to
 794     * the previous context, if any, are flushed before it is released."
 795     * But only flush if we're actually changing contexts.
 796     */
 797    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 798       _mesa_flush(curCtx);
 799    }
 800
 801    if (driContextPriv) {
 802       struct gl_context *ctx = &brw->ctx;
 803       struct gl_framebuffer *fb, *readFb;
 804
 805       if (driDrawPriv == NULL && driReadPriv == NULL) {
 806          fb = _mesa_get_incomplete_framebuffer();
 807          readFb = _mesa_get_incomplete_framebuffer();
 808       } else {
 809          fb = driDrawPriv->driverPrivate;
 810          readFb = driReadPriv->driverPrivate;
 811          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
 812          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
 813       }
 814
 815       /* The sRGB workaround changes the renderbuffer's format. We must change
 816        * the format before the renderbuffer's miptree get's allocated, otherwise
 817        * the formats of the renderbuffer and its miptree will differ.
 818        */
 819       intel_gles3_srgb_workaround(brw, fb);
 820       intel_gles3_srgb_workaround(brw, readFb);
 821
 822       intel_prepare_render(brw);
 823       _mesa_make_current(ctx, fb, readFb);
 824    } else {
 825       _mesa_make_current(NULL, NULL, NULL);
 826    }
 827
 828    return true;
 829 }
 830
 831 void
 832 intel_resolve_for_dri2_flush(struct brw_context *brw,
 833                              __DRIdrawable *drawable)
 834 {
 835    if (brw->gen < 6) {
 836       /* MSAA and fast color clear are not supported, so don't waste time
 837        * checking whether a resolve is needed.
 838        */
 839       return;
 840    }
 841
 842    struct gl_framebuffer *fb = drawable->driverPrivate;
 843    struct intel_renderbuffer *rb;
 844
 845    /* Usually, only the back buffer will need to be downsampled. However,
 846     * the front buffer will also need it if the user has rendered into it.
 847     */
 848    static const gl_buffer_index buffers[2] = {
 849          BUFFER_BACK_LEFT,
 850          BUFFER_FRONT_LEFT,
 851    };
 852
 853    for (int i = 0; i < 2; ++i) {
 854       rb = intel_get_renderbuffer(fb, buffers[i]);
 855       if (rb == NULL || rb->mt == NULL)
 856          continue;
 857       if (rb->mt->num_samples <= 1)
 858          intel_miptree_resolve_color(brw, rb->mt);
 859       else
 860          intel_miptree_downsample(brw, rb->mt);
 861    }
 862 }
 863
 864 static unsigned
 865 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
 866 {
 867    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
 868 }
 869
 870 static void
 871 intel_query_dri2_buffers(struct brw_context *brw,
 872                          __DRIdrawable *drawable,
 873                          __DRIbuffer **buffers,
 874                          int *count);
 875
 876 static void
 877 intel_process_dri2_buffer(struct brw_context *brw,
 878                           __DRIdrawable *drawable,
 879                           __DRIbuffer *buffer,
 880                           struct intel_renderbuffer *rb,
 881                           const char *buffer_name);
 882
 883 void
 884 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
 885 {
 886    struct gl_framebuffer *fb = drawable->driverPrivate;
 887    struct intel_renderbuffer *rb;
 888    struct brw_context *brw = context->driverPrivate;
 889    __DRIbuffer *buffers = NULL;
 890    int i, count;
 891    const char *region_name;
 892
 893    /* Set this up front, so that in case our buffers get invalidated
 894     * while we're getting new buffers, we don't clobber the stamp and
 895     * thus ignore the invalidate. */
 896    drawable->lastStamp = drawable->dri2.stamp;
 897
 898    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
 899       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
 900
 901    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
 902
 903    if (buffers == NULL)
 904       return;
 905
 906    for (i = 0; i < count; i++) {
 907        switch (buffers[i].attachment) {
 908        case __DRI_BUFFER_FRONT_LEFT:
 909            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
 910            region_name = "dri2 front buffer";
 911            break;
 912
 913        case __DRI_BUFFER_FAKE_FRONT_LEFT:
 914            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
 915            region_name = "dri2 fake front buffer";
 916            break;
 917
 918        case __DRI_BUFFER_BACK_LEFT:
 919            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
 920            region_name = "dri2 back buffer";
 921            break;
 922
 923        case __DRI_BUFFER_DEPTH:
 924        case __DRI_BUFFER_HIZ:
 925        case __DRI_BUFFER_DEPTH_STENCIL:
 926        case __DRI_BUFFER_STENCIL:
 927        case __DRI_BUFFER_ACCUM:
 928        default:
 929            fprintf(stderr,
 930                    "unhandled buffer attach event, attachment type %d\n",
 931                    buffers[i].attachment);
 932            return;
 933        }
 934
 935        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
 936    }
 937
 938    driUpdateFramebufferSize(&brw->ctx, drawable);
 939 }
 940
 941 /**
 942  * intel_prepare_render should be called anywhere that curent read/drawbuffer
 943  * state is required.
 944  */
 945 void
 946 intel_prepare_render(struct brw_context *brw)
 947 {
 948    __DRIcontext *driContext = brw->driContext;
 949    __DRIdrawable *drawable;
 950
 951    drawable = driContext->driDrawablePriv;
 952    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
 953       if (drawable->lastStamp != drawable->dri2.stamp)
 954          intel_update_renderbuffers(driContext, drawable);
 955       driContext->dri2.draw_stamp = drawable->dri2.stamp;
 956    }
 957
 958    drawable = driContext->driReadablePriv;
 959    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
 960       if (drawable->lastStamp != drawable->dri2.stamp)
 961          intel_update_renderbuffers(driContext, drawable);
 962       driContext->dri2.read_stamp = drawable->dri2.stamp;
 963    }
 964
 965    /* If we're currently rendering to the front buffer, the rendering
 966     * that will happen next will probably dirty the front buffer.  So
 967     * mark it as dirty here.
 968     */
 969    if (brw->is_front_buffer_rendering)
 970       brw->front_buffer_dirty = true;
 971
 972    /* Wait for the swapbuffers before the one we just emitted, so we
 973     * don't get too many swaps outstanding for apps that are GPU-heavy
 974     * but not CPU-heavy.
 975     *
 976     * We're using intelDRI2Flush (called from the loader before
 977     * swapbuffer) and glFlush (for front buffer rendering) as the
 978     * indicator that a frame is done and then throttle when we get
 979     * here as we prepare to render the next frame.  At this point for
 980     * round trips for swap/copy and getting new buffers are done and
 981     * we'll spend less time waiting on the GPU.
 982     *
 983     * Unfortunately, we don't have a handle to the batch containing
 984     * the swap, and getting our hands on that doesn't seem worth it,
 985     * so we just us the first batch we emitted after the last swap.
 986     */
 987    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
 988       if (!brw->disable_throttling)
 989          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
 990       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 991       brw->first_post_swapbuffers_batch = NULL;
 992       brw->need_throttle = false;
 993    }
 994 }
 995
 996 /**
 997  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
 998  *
 999  * To determine which DRI buffers to request, examine the renderbuffers
1000  * attached to the drawable's framebuffer. Then request the buffers with
1001  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1002  *
1003  * This is called from intel_update_renderbuffers().
1004  *
1005  * \param drawable      Drawable whose buffers are queried.
1006  * \param buffers       [out] List of buffers returned by DRI2 query.
1007  * \param buffer_count  [out] Number of buffers returned.
1008  *
1009  * \see intel_update_renderbuffers()
1010  * \see DRI2GetBuffers()
1011  * \see DRI2GetBuffersWithFormat()
1012  */
1013 static void
1014 intel_query_dri2_buffers(struct brw_context *brw,
1015                          __DRIdrawable *drawable,
1016                          __DRIbuffer **buffers,
1017                          int *buffer_count)
1018 {
1019    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1020    struct gl_framebuffer *fb = drawable->driverPrivate;
1021    int i = 0;
1022    unsigned attachments[8];
1023
1024    struct intel_renderbuffer *front_rb;
1025    struct intel_renderbuffer *back_rb;
1026
1027    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1028    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1029
1030    memset(attachments, 0, sizeof(attachments));
1031    if ((brw->is_front_buffer_rendering ||
1032         brw->is_front_buffer_reading ||
1033         !back_rb) && front_rb) {
1034       /* If a fake front buffer is in use, then querying for
1035        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1036        * the real front buffer to the fake front buffer.  So before doing the
1037        * query, we need to make sure all the pending drawing has landed in the
1038        * real front buffer.
1039        */
1040       intel_batchbuffer_flush(brw);
1041       intel_flush_front(&brw->ctx);
1042
1043       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1044       attachments[i++] = intel_bits_per_pixel(front_rb);
1045    } else if (front_rb && brw->front_buffer_dirty) {
1046       /* We have pending front buffer rendering, but we aren't querying for a
1047        * front buffer.  If the front buffer we have is a fake front buffer,
1048        * the X server is going to throw it away when it processes the query.
1049        * So before doing the query, make sure all the pending drawing has
1050        * landed in the real front buffer.
1051        */
1052       intel_batchbuffer_flush(brw);
1053       intel_flush_front(&brw->ctx);
1054    }
1055
1056    if (back_rb) {
1057       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1058       attachments[i++] = intel_bits_per_pixel(back_rb);
1059    }
1060
1061    assert(i <= ARRAY_SIZE(attachments));
1062
1063    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1064                                                         &drawable->w,
1065                                                         &drawable->h,
1066                                                         attachments, i / 2,
1067                                                         buffer_count,
1068                                                         drawable->loaderPrivate);
1069 }
1070
1071 /**
1072  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1073  *
1074  * This is called from intel_update_renderbuffers().
1075  *
1076  * \par Note:
1077  *    DRI buffers whose attachment point is DRI2BufferStencil or
1078  *    DRI2BufferDepthStencil are handled as special cases.
1079  *
1080  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1081  *        that is passed to intel_region_alloc_for_handle().
1082  *
1083  * \see intel_update_renderbuffers()
1084  * \see intel_region_alloc_for_handle()
1085  */
1086 static void
1087 intel_process_dri2_buffer(struct brw_context *brw,
1088                           __DRIdrawable *drawable,
1089                           __DRIbuffer *buffer,
1090                           struct intel_renderbuffer *rb,
1091                           const char *buffer_name)
1092 {
1093    struct intel_region *region = NULL;
1094
1095    if (!rb)
1096       return;
1097
1098    unsigned num_samples = rb->Base.Base.NumSamples;
1099
1100    /* We try to avoid closing and reopening the same BO name, because the first
1101     * use of a mapping of the buffer involves a bunch of page faulting which is
1102     * moderately expensive.
1103     */
1104    if (num_samples == 0) {
1105        if (rb->mt &&
1106            rb->mt->region &&
1107            rb->mt->region->name == buffer->name)
1108           return;
1109    } else {
1110        if (rb->mt &&
1111            rb->mt->singlesample_mt &&
1112            rb->mt->singlesample_mt->region &&
1113            rb->mt->singlesample_mt->region->name == buffer->name)
1114           return;
1115    }
1116
1117    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1118       fprintf(stderr,
1119               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1120               buffer->name, buffer->attachment,
1121               buffer->cpp, buffer->pitch);
1122    }
1123
1124    intel_miptree_release(&rb->mt);
1125    region = intel_region_alloc_for_handle(brw->intelScreen,
1126                                           buffer->cpp,
1127                                           drawable->w,
1128                                           drawable->h,
1129                                           buffer->pitch,
1130                                           buffer->name,
1131                                           buffer_name);
1132    if (!region)
1133       return;
1134
1135    rb->mt = intel_miptree_create_for_dri2_buffer(brw,
1136                                                  buffer->attachment,
1137                                                  intel_rb_format(rb),
1138                                                  num_samples,
1139                                                  region);
1140    intel_region_release(&region);
1141 }