src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/imports.h"
  40 #include "main/macros.h"
  41 #include "main/points.h"
  42 #include "main/version.h"
  43 #include "main/vtxfmt.h"
  44 #include "main/texobj.h"
  45 #include "main/framebuffer.h"
  46 #include "main/stencil.h"
  47 #include "main/state.h"
  48
  49 #include "vbo/vbo_context.h"
  50
  51 #include "drivers/common/driverfuncs.h"
  52 #include "drivers/common/meta.h"
  53 #include "utils.h"
  54
  55 #include "brw_context.h"
  56 #include "brw_defines.h"
  57 #include "brw_blorp.h"
  58 #include "brw_draw.h"
  59 #include "brw_state.h"
  60
  61 #include "intel_batchbuffer.h"
  62 #include "intel_buffer_objects.h"
  63 #include "intel_buffers.h"
  64 #include "intel_fbo.h"
  65 #include "intel_mipmap_tree.h"
  66 #include "intel_pixel.h"
  67 #include "intel_image.h"
  68 #include "intel_tex.h"
  69 #include "intel_tex_obj.h"
  70
  71 #include "swrast_setup/swrast_setup.h"
  72 #include "tnl/tnl.h"
  73 #include "tnl/t_pipeline.h"
  74 #include "util/ralloc.h"
  75 #include "util/debug.h"
  76 #include "isl/isl.h"
  77
  78 /***************************************
  79  * Mesa's Driver Functions
  80  ***************************************/
  81
  82 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  83
  84 static const char *
  85 get_bsw_model(const struct intel_screen *screen)
  86 {
  87    switch (screen->eu_total) {
  88    case 16:
  89       return "405";
  90    case 12:
  91       return "400";
  92    default:
  93       return "   ";
  94    }
  95 }
  96
  97 const char *
  98 brw_get_renderer_string(const struct intel_screen *screen)
  99 {
 100    const char *chipset;
 101    static char buffer[128];
 102    char *bsw = NULL;
 103
 104    switch (screen->deviceID) {
 105 #undef CHIPSET
 106 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 107 #include "pci_ids/i965_pci_ids.h"
 108    default:
 109       chipset = "Unknown Intel Chipset";
 110       break;
 111    }
 112
 113    /* Braswell branding is funny, so we have to fix it up here */
 114    if (screen->deviceID == 0x22B1) {
 115       bsw = strdup(chipset);
 116       char *needle = strstr(bsw, "XXX");
 117       if (needle) {
 118          memcpy(needle, get_bsw_model(screen), 3);
 119          chipset = bsw;
 120       }
 121    }
 122
 123    (void) driGetRendererString(buffer, chipset, 0);
 124    free(bsw);
 125    return buffer;
 126 }
 127
 128 static const GLubyte *
 129 intel_get_string(struct gl_context * ctx, GLenum name)
 130 {
 131    const struct brw_context *const brw = brw_context(ctx);
 132
 133    switch (name) {
 134    case GL_VENDOR:
 135       return (GLubyte *) brw_vendor_string;
 136
 137    case GL_RENDERER:
 138       return
 139          (GLubyte *) brw_get_renderer_string(brw->screen);
 140
 141    default:
 142       return NULL;
 143    }
 144 }
 145
 146 static void
 147 intel_viewport(struct gl_context *ctx)
 148 {
 149    struct brw_context *brw = brw_context(ctx);
 150    __DRIcontext *driContext = brw->driContext;
 151
 152    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 153       if (driContext->driDrawablePriv)
 154          dri2InvalidateDrawable(driContext->driDrawablePriv);
 155       if (driContext->driReadablePriv)
 156          dri2InvalidateDrawable(driContext->driReadablePriv);
 157    }
 158 }
 159
 160 static void
 161 intel_update_framebuffer(struct gl_context *ctx,
 162                          struct gl_framebuffer *fb)
 163 {
 164    struct brw_context *brw = brw_context(ctx);
 165
 166    /* Quantize the derived default number of samples
 167     */
 168    fb->DefaultGeometry._NumSamples =
 169       intel_quantize_num_samples(brw->screen,
 170                                  fb->DefaultGeometry.NumSamples);
 171 }
 172
 173 static void
 174 intel_update_state(struct gl_context * ctx)
 175 {
 176    GLuint new_state = ctx->NewState;
 177    struct brw_context *brw = brw_context(ctx);
 178
 179    if (ctx->swrast_context)
 180       _swrast_InvalidateState(ctx, new_state);
 181
 182    brw->NewGLState |= new_state;
 183
 184    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 185       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 186
 187    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 188       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 189       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 190       brw->stencil_write_enabled =
 191          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 192    }
 193
 194    if (new_state & _NEW_POLYGON)
 195       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 196
 197    intel_prepare_render(brw);
 198
 199    if (new_state & _NEW_BUFFERS) {
 200       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 201       if (ctx->DrawBuffer != ctx->ReadBuffer)
 202          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 203    }
 204 }
 205
 206 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 207
 208 static void
 209 intel_flush_front(struct gl_context *ctx)
 210 {
 211    struct brw_context *brw = brw_context(ctx);
 212    __DRIcontext *driContext = brw->driContext;
 213    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 214    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 215
 216    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 217       if (flushFront(dri_screen) && driDrawable &&
 218           driDrawable->loaderPrivate) {
 219
 220          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 221           *
 222           * This potentially resolves both front and back buffer. It
 223           * is unnecessary to resolve the back, but harms nothing except
 224           * performance. And no one cares about front-buffer render
 225           * performance.
 226           */
 227          intel_resolve_for_dri2_flush(brw, driDrawable);
 228          intel_batchbuffer_flush(brw);
 229
 230          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 231
 232          /* We set the dirty bit in intel_prepare_render() if we're
 233           * front buffer rendering once we get there.
 234           */
 235          brw->front_buffer_dirty = false;
 236       }
 237    }
 238 }
 239
 240 static void
 241 intel_glFlush(struct gl_context *ctx)
 242 {
 243    struct brw_context *brw = brw_context(ctx);
 244
 245    intel_batchbuffer_flush(brw);
 246    intel_flush_front(ctx);
 247
 248    brw->need_flush_throttle = true;
 249 }
 250
 251 static void
 252 intel_finish(struct gl_context * ctx)
 253 {
 254    struct brw_context *brw = brw_context(ctx);
 255
 256    intel_glFlush(ctx);
 257
 258    if (brw->batch.last_bo)
 259       brw_bo_wait_rendering(brw->batch.last_bo);
 260 }
 261
 262 static void
 263 brw_init_driver_functions(struct brw_context *brw,
 264                           struct dd_function_table *functions)
 265 {
 266    _mesa_init_driver_functions(functions);
 267
 268    /* GLX uses DRI2 invalidate events to handle window resizing.
 269     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 270     * which doesn't provide a mechanism for snooping the event queues.
 271     *
 272     * So EGL still relies on viewport hacks to handle window resizing.
 273     * This should go away with DRI3000.
 274     */
 275    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 276       functions->Viewport = intel_viewport;
 277
 278    functions->Flush = intel_glFlush;
 279    functions->Finish = intel_finish;
 280    functions->GetString = intel_get_string;
 281    functions->UpdateState = intel_update_state;
 282
 283    intelInitTextureFuncs(functions);
 284    intelInitTextureImageFuncs(functions);
 285    intelInitTextureSubImageFuncs(functions);
 286    intelInitTextureCopyImageFuncs(functions);
 287    intelInitCopyImageFuncs(functions);
 288    intelInitClearFuncs(functions);
 289    intelInitBufferFuncs(functions);
 290    intelInitPixelFuncs(functions);
 291    intelInitBufferObjectFuncs(functions);
 292    brw_init_syncobj_functions(functions);
 293    brw_init_object_purgeable_functions(functions);
 294
 295    brwInitFragProgFuncs( functions );
 296    brw_init_common_queryobj_functions(functions);
 297    if (brw->gen >= 8 || brw->is_haswell)
 298       hsw_init_queryobj_functions(functions);
 299    else if (brw->gen >= 6)
 300       gen6_init_queryobj_functions(functions);
 301    else
 302       gen4_init_queryobj_functions(functions);
 303    brw_init_compute_functions(functions);
 304    brw_init_conditional_render_functions(functions);
 305
 306    functions->QueryInternalFormat = brw_query_internal_format;
 307
 308    functions->NewTransformFeedback = brw_new_transform_feedback;
 309    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 310    if (can_do_mi_math_and_lrr(brw->screen)) {
 311       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 312       functions->EndTransformFeedback = hsw_end_transform_feedback;
 313       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 314       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 315    } else if (brw->gen >= 7) {
 316       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 317       functions->EndTransformFeedback = gen7_end_transform_feedback;
 318       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 319       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 320       functions->GetTransformFeedbackVertexCount =
 321          brw_get_transform_feedback_vertex_count;
 322    } else {
 323       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 324       functions->EndTransformFeedback = brw_end_transform_feedback;
 325       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 326       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 327       functions->GetTransformFeedbackVertexCount =
 328          brw_get_transform_feedback_vertex_count;
 329    }
 330
 331    if (brw->gen >= 6)
 332       functions->GetSamplePosition = gen6_get_sample_position;
 333 }
 334
 335 static void
 336 brw_initialize_context_constants(struct brw_context *brw)
 337 {
 338    struct gl_context *ctx = &brw->ctx;
 339    const struct brw_compiler *compiler = brw->screen->compiler;
 340
 341    const bool stage_exists[MESA_SHADER_STAGES] = {
 342       [MESA_SHADER_VERTEX] = true,
 343       [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
 344       [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
 345       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
 346       [MESA_SHADER_FRAGMENT] = true,
 347       [MESA_SHADER_COMPUTE] =
 348          ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) &&
 349           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 350          (ctx->API == API_OPENGLES2 &&
 351           ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
 352          _mesa_extension_override_enables.ARB_compute_shader,
 353    };
 354
 355    unsigned num_stages = 0;
 356    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 357       if (stage_exists[i])
 358          num_stages++;
 359    }
 360
 361    unsigned max_samplers =
 362       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 363
 364    ctx->Const.MaxDualSourceDrawBuffers = 1;
 365    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 366    ctx->Const.MaxCombinedShaderOutputResources =
 367       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 368
 369    /* The timestamp register we can read for glGetTimestamp() is
 370     * sometimes only 32 bits, before scaling to nanoseconds (depending
 371     * on kernel).
 372     *
 373     * Once scaled to nanoseconds the timestamp would roll over at a
 374     * non-power-of-two, so an application couldn't use
 375     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 376     * report 36 bits and truncate at that (rolling over 5 times as
 377     * often as the HW counter), and when the 32-bit counter rolls
 378     * over, it happens to also be at a rollover in the reported value
 379     * from near (1<<36) to 0.
 380     *
 381     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 382     * rolls over every ~69 seconds.
 383     */
 384    ctx->Const.QueryCounterBits.Timestamp = 36;
 385
 386    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 387    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 388    if (brw->gen >= 7) {
 389       ctx->Const.MaxRenderbufferSize = 16384;
 390       ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
 391       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 392    } else {
 393       ctx->Const.MaxRenderbufferSize = 8192;
 394       ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
 395       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 396    }
 397    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 398    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
 399    ctx->Const.MaxTextureMbytes = 1536;
 400    ctx->Const.MaxTextureRectSize = brw->gen >= 7 ? 16384 : 8192;
 401    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 402    ctx->Const.MaxTextureLodBias = 15.0;
 403    ctx->Const.StripTextureBorder = true;
 404    if (brw->gen >= 7) {
 405       ctx->Const.MaxProgramTextureGatherComponents = 4;
 406       ctx->Const.MinProgramTextureGatherOffset = -32;
 407       ctx->Const.MaxProgramTextureGatherOffset = 31;
 408    } else if (brw->gen == 6) {
 409       ctx->Const.MaxProgramTextureGatherComponents = 1;
 410       ctx->Const.MinProgramTextureGatherOffset = -8;
 411       ctx->Const.MaxProgramTextureGatherOffset = 7;
 412    }
 413
 414    ctx->Const.MaxUniformBlockSize = 65536;
 415
 416    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 417       struct gl_program_constants *prog = &ctx->Const.Program[i];
 418
 419       if (!stage_exists[i])
 420          continue;
 421
 422       prog->MaxTextureImageUnits = max_samplers;
 423
 424       prog->MaxUniformBlocks = BRW_MAX_UBO;
 425       prog->MaxCombinedUniformComponents =
 426          prog->MaxUniformComponents +
 427          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 428
 429       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 430       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 431       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 432       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 433    }
 434
 435    ctx->Const.MaxTextureUnits =
 436       MIN2(ctx->Const.MaxTextureCoordUnits,
 437            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 438
 439    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 440    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 441    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 442    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 443    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 444    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 445    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 446
 447
 448    /* Hardware only supports a limited number of transform feedback buffers.
 449     * So we need to override the Mesa default (which is based only on software
 450     * limits).
 451     */
 452    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 453
 454    /* On Gen6, in the worst case, we use up one binding table entry per
 455     * transform feedback component (see comments above the definition of
 456     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 457     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 458     * BRW_MAX_SOL_BINDINGS.
 459     *
 460     * In "separate components" mode, we need to divide this value by
 461     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 462     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 463     */
 464    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 465    ctx->Const.MaxTransformFeedbackSeparateComponents =
 466       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 467
 468    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 469       !can_do_mi_math_and_lrr(brw->screen);
 470
 471    int max_samples;
 472    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 473    const int clamp_max_samples =
 474       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 475
 476    if (clamp_max_samples < 0) {
 477       max_samples = msaa_modes[0];
 478    } else {
 479       /* Select the largest supported MSAA mode that does not exceed
 480        * clamp_max_samples.
 481        */
 482       max_samples = 0;
 483       for (int i = 0; msaa_modes[i] != 0; ++i) {
 484          if (msaa_modes[i] <= clamp_max_samples) {
 485             max_samples = msaa_modes[i];
 486             break;
 487          }
 488       }
 489    }
 490
 491    ctx->Const.MaxSamples = max_samples;
 492    ctx->Const.MaxColorTextureSamples = max_samples;
 493    ctx->Const.MaxDepthTextureSamples = max_samples;
 494    ctx->Const.MaxIntegerSamples = max_samples;
 495    ctx->Const.MaxImageSamples = 0;
 496
 497    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 498     * to map indices of rectangular grid to sample numbers within a pixel.
 499     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 500     * extension implementation. For more details see the comment above
 501     * gen6_set_sample_maps() definition.
 502     */
 503    gen6_set_sample_maps(ctx);
 504
 505    ctx->Const.MinLineWidth = 1.0;
 506    ctx->Const.MinLineWidthAA = 1.0;
 507    if (brw->gen >= 6) {
 508       ctx->Const.MaxLineWidth = 7.375;
 509       ctx->Const.MaxLineWidthAA = 7.375;
 510       ctx->Const.LineWidthGranularity = 0.125;
 511    } else {
 512       ctx->Const.MaxLineWidth = 7.0;
 513       ctx->Const.MaxLineWidthAA = 7.0;
 514       ctx->Const.LineWidthGranularity = 0.5;
 515    }
 516
 517    /* For non-antialiased lines, we have to round the line width to the
 518     * nearest whole number. Make sure that we don't advertise a line
 519     * width that, when rounded, will be beyond the actual hardware
 520     * maximum.
 521     */
 522    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 523
 524    ctx->Const.MinPointSize = 1.0;
 525    ctx->Const.MinPointSizeAA = 1.0;
 526    ctx->Const.MaxPointSize = 255.0;
 527    ctx->Const.MaxPointSizeAA = 255.0;
 528    ctx->Const.PointSizeGranularity = 1.0;
 529
 530    if (brw->gen >= 5 || brw->is_g4x)
 531       ctx->Const.MaxClipPlanes = 8;
 532
 533    ctx->Const.GLSLTessLevelsAsInputs = true;
 534    ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
 535    ctx->Const.LowerTESPatchVerticesIn = true;
 536    ctx->Const.PrimitiveRestartForPatches = true;
 537
 538    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 539    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 540    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 541    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 542    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 543    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 544    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 545    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 546    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 547    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 548    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 549    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 550       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 551            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 552
 553    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 554    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 555    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 556    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 557    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 558    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 559    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 560    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 561    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 562       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 563            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 564
 565    /* Fragment shaders use real, 32-bit twos-complement integers for all
 566     * integer types.
 567     */
 568    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 569    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 570    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 571    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 572    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 573
 574    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 575    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 576    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 577    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 578    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 579
 580    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 581     * but we're not sure how it's actually done for vertex order,
 582     * that affect provoking vertex decision. Always use last vertex
 583     * convention for quad primitive which works as expected for now.
 584     */
 585    if (brw->gen >= 6)
 586       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 587
 588    ctx->Const.NativeIntegers = true;
 589    ctx->Const.VertexID_is_zero_based = true;
 590
 591    /* Regarding the CMP instruction, the Ivybridge PRM says:
 592     *
 593     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 594     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 595     *    0xFFFFFFFF) is assigned to dst."
 596     *
 597     * but PRMs for earlier generations say
 598     *
 599     *   "In dword format, one GRF may store up to 8 results. When the register
 600     *    is used later as a vector of Booleans, as only LSB at each channel
 601     *    contains meaning [sic] data, software should make sure all higher bits
 602     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 603     *
 604     * We select the representation of a true boolean uniform to be ~0, and fix
 605     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 606     */
 607    ctx->Const.UniformBooleanTrue = ~0;
 608
 609    /* From the gen4 PRM, volume 4 page 127:
 610     *
 611     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 612     *      the base address of the first element of the surface, computed in
 613     *      software by adding the surface base address to the byte offset of
 614     *      the element in the buffer."
 615     *
 616     * However, unaligned accesses are slower, so enforce buffer alignment.
 617     */
 618    ctx->Const.UniformBufferOffsetAlignment = 16;
 619
 620    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 621     * that we can safely have the CPU and GPU writing the same SSBO on
 622     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 623     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 624     * be updating disjoint regions of the buffer simultaneously and that will
 625     * break if the regions overlap the same cacheline.
 626     */
 627    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 628    ctx->Const.TextureBufferOffsetAlignment = 16;
 629    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 630
 631    if (brw->gen >= 6) {
 632       ctx->Const.MaxVarying = 32;
 633       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 634       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 635       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 636       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 637       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 638       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 639       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 640       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 641    }
 642
 643    /* We want the GLSL compiler to emit code that uses condition codes */
 644    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 645       ctx->Const.ShaderCompilerOptions[i] =
 646          brw->screen->compiler->glsl_compiler_options[i];
 647    }
 648
 649    if (brw->gen >= 7) {
 650       ctx->Const.MaxViewportWidth = 32768;
 651       ctx->Const.MaxViewportHeight = 32768;
 652    }
 653
 654    /* ARB_viewport_array, OES_viewport_array */
 655    if (brw->gen >= 6) {
 656       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 657       ctx->Const.ViewportSubpixelBits = 0;
 658
 659       /* Cast to float before negating because MaxViewportWidth is unsigned.
 660        */
 661       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 662       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 663    }
 664
 665    /* ARB_gpu_shader5 */
 666    if (brw->gen >= 7)
 667       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 668
 669    /* ARB_framebuffer_no_attachments */
 670    ctx->Const.MaxFramebufferWidth = 16384;
 671    ctx->Const.MaxFramebufferHeight = 16384;
 672    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 673    ctx->Const.MaxFramebufferSamples = max_samples;
 674
 675    /* OES_primitive_bounding_box */
 676    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 677 }
 678
 679 static void
 680 brw_initialize_cs_context_constants(struct brw_context *brw)
 681 {
 682    struct gl_context *ctx = &brw->ctx;
 683    const struct intel_screen *screen = brw->screen;
 684    struct gen_device_info *devinfo = &brw->screen->devinfo;
 685
 686    /* FINISHME: Do this for all platforms that the kernel supports */
 687    if (brw->is_cherryview &&
 688        screen->subslice_total > 0 && screen->eu_total > 0) {
 689       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 690       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 691
 692       /* Fuse configurations may give more threads than expected, never less. */
 693       if (max_cs_threads > devinfo->max_cs_threads)
 694          devinfo->max_cs_threads = max_cs_threads;
 695    }
 696
 697    /* Maximum number of scalar compute shader invocations that can be run in
 698     * parallel in the same subslice assuming SIMD32 dispatch.
 699     *
 700     * We don't advertise more than 64 threads, because we are limited to 64 by
 701     * our usage of thread_width_max in the gpgpu walker command. This only
 702     * currently impacts Haswell, which otherwise might be able to advertise 70
 703     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 704     * required the number of invocation needed for ARB_compute_shader.
 705     */
 706    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 707    const uint32_t max_invocations = 32 * max_threads;
 708    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 709    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 710    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 711    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 712    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 713 }
 714
 715 /**
 716  * Process driconf (drirc) options, setting appropriate context flags.
 717  *
 718  * intelInitExtensions still pokes at optionCache directly, in order to
 719  * avoid advertising various extensions.  No flags are set, so it makes
 720  * sense to continue doing that there.
 721  */
 722 static void
 723 brw_process_driconf_options(struct brw_context *brw)
 724 {
 725    struct gl_context *ctx = &brw->ctx;
 726
 727    driOptionCache *options = &brw->optionCache;
 728    driParseConfigFiles(options, &brw->screen->optionCache,
 729                        brw->driContext->driScreenPriv->myNum, "i965");
 730
 731    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 732    switch (bo_reuse_mode) {
 733    case DRI_CONF_BO_REUSE_DISABLED:
 734       break;
 735    case DRI_CONF_BO_REUSE_ALL:
 736       brw_bufmgr_enable_reuse(brw->bufmgr);
 737       break;
 738    }
 739
 740    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 741        brw->has_hiz = false;
 742        /* On gen6, you can only do separate stencil with HIZ. */
 743        if (brw->gen == 6)
 744           brw->has_separate_stencil = false;
 745    }
 746
 747    if (driQueryOptionb(options, "always_flush_batch")) {
 748       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 749       brw->always_flush_batch = true;
 750    }
 751
 752    if (driQueryOptionb(options, "always_flush_cache")) {
 753       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 754       brw->always_flush_cache = true;
 755    }
 756
 757    if (driQueryOptionb(options, "disable_throttling")) {
 758       fprintf(stderr, "disabling flush throttling\n");
 759       brw->disable_throttling = true;
 760    }
 761
 762    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 763
 764    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 765       brw->screen->compiler->precise_trig = true;
 766
 767    ctx->Const.ForceGLSLExtensionsWarn =
 768       driQueryOptionb(options, "force_glsl_extensions_warn");
 769
 770    ctx->Const.ForceGLSLVersion =
 771       driQueryOptioni(options, "force_glsl_version");
 772
 773    ctx->Const.DisableGLSLLineContinuations =
 774       driQueryOptionb(options, "disable_glsl_line_continuations");
 775
 776    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 777       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 778
 779    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 780       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 781
 782    ctx->Const.AllowHigherCompatVersion =
 783       driQueryOptionb(options, "allow_higher_compat_version");
 784
 785    ctx->Const.ForceGLSLAbsSqrt =
 786       driQueryOptionb(options, "force_glsl_abs_sqrt");
 787
 788    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 789
 790    brw->dual_color_blend_by_location =
 791       driQueryOptionb(options, "dual_color_blend_by_location");
 792 }
 793
 794 GLboolean
 795 brwCreateContext(gl_api api,
 796                  const struct gl_config *mesaVis,
 797                  __DRIcontext *driContextPriv,
 798                  unsigned major_version,
 799                  unsigned minor_version,
 800                  uint32_t flags,
 801                  bool notify_reset,
 802                  unsigned *dri_ctx_error,
 803                  void *sharedContextPrivate)
 804 {
 805    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 806    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 807    const struct gen_device_info *devinfo = &screen->devinfo;
 808    struct dd_function_table functions;
 809
 810    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 811     * provides us with context reset notifications.
 812     */
 813    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 814       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 815
 816    if (screen->has_context_reset_notification)
 817       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 818
 819    if (flags & ~allowed_flags) {
 820       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 821       return false;
 822    }
 823
 824    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 825    if (!brw) {
 826       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 827       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 828       return false;
 829    }
 830
 831    driContextPriv->driverPrivate = brw;
 832    brw->driContext = driContextPriv;
 833    brw->screen = screen;
 834    brw->bufmgr = screen->bufmgr;
 835
 836    brw->gen = devinfo->gen;
 837    brw->gt = devinfo->gt;
 838    brw->is_g4x = devinfo->is_g4x;
 839    brw->is_baytrail = devinfo->is_baytrail;
 840    brw->is_haswell = devinfo->is_haswell;
 841    brw->is_cherryview = devinfo->is_cherryview;
 842    brw->is_broxton = devinfo->is_broxton || devinfo->is_geminilake;
 843    brw->has_llc = devinfo->has_llc;
 844    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 845    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 846    brw->has_pln = devinfo->has_pln;
 847    brw->has_compr4 = devinfo->has_compr4;
 848    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 849    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 850    brw->needs_unlit_centroid_workaround =
 851       devinfo->needs_unlit_centroid_workaround;
 852
 853    brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
 854    brw->has_swizzling = screen->hw_has_swizzling;
 855
 856    isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling);
 857
 858    brw->vs.base.stage = MESA_SHADER_VERTEX;
 859    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 860    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 861    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 862    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 863    if (brw->gen >= 8) {
 864       gen8_init_vtable_surface_functions(brw);
 865       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 866    } else if (brw->gen >= 7) {
 867       gen7_init_vtable_surface_functions(brw);
 868       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 869    } else if (brw->gen >= 6) {
 870       gen6_init_vtable_surface_functions(brw);
 871       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 872    } else {
 873       gen4_init_vtable_surface_functions(brw);
 874       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 875    }
 876
 877    brw_init_driver_functions(brw, &functions);
 878
 879    if (notify_reset)
 880       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 881
 882    struct gl_context *ctx = &brw->ctx;
 883
 884    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 885       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 886       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 887       intelDestroyContext(driContextPriv);
 888       return false;
 889    }
 890
 891    driContextSetFlags(ctx, flags);
 892
 893    /* Initialize the software rasterizer and helper modules.
 894     *
 895     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 896     * software fallbacks (which we have to support on legacy GL to do weird
 897     * glDrawPixels(), glBitmap(), and other functions).
 898     */
 899    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 900       _swrast_CreateContext(ctx);
 901    }
 902
 903    _vbo_CreateContext(ctx);
 904    if (ctx->swrast_context) {
 905       _tnl_CreateContext(ctx);
 906       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 907       _swsetup_CreateContext(ctx);
 908
 909       /* Configure swrast to match hardware characteristics: */
 910       _swrast_allow_pixel_fog(ctx, false);
 911       _swrast_allow_vertex_fog(ctx, true);
 912    }
 913
 914    _mesa_meta_init(ctx);
 915
 916    brw_process_driconf_options(brw);
 917
 918    if (INTEL_DEBUG & DEBUG_PERF)
 919       brw->perf_debug = true;
 920
 921    brw_initialize_cs_context_constants(brw);
 922    brw_initialize_context_constants(brw);
 923
 924    ctx->Const.ResetStrategy = notify_reset
 925       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 926
 927    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 928    _mesa_init_point(ctx);
 929
 930    intel_fbo_init(brw);
 931
 932    intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc);
 933
 934    if (brw->gen >= 6) {
 935       /* Create a new hardware context.  Using a hardware context means that
 936        * our GPU state will be saved/restored on context switch, allowing us
 937        * to assume that the GPU is in the same state we left it in.
 938        *
 939        * This is required for transform feedback buffer offsets, query objects,
 940        * and also allows us to reduce how much state we have to emit.
 941        */
 942       brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
 943
 944       if (!brw->hw_ctx) {
 945          fprintf(stderr, "Failed to create hardware context.\n");
 946          intelDestroyContext(driContextPriv);
 947          return false;
 948       }
 949    }
 950
 951    if (brw_init_pipe_control(brw, devinfo)) {
 952       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 953       intelDestroyContext(driContextPriv);
 954       return false;
 955    }
 956
 957    brw_init_state(brw);
 958
 959    intelInitExtensions(ctx);
 960
 961    brw_init_surface_formats(brw);
 962
 963    brw_blorp_init(brw);
 964
 965    brw->urb.size = devinfo->urb.size;
 966
 967    if (brw->gen == 6)
 968       brw->urb.gs_present = false;
 969
 970    brw->prim_restart.in_progress = false;
 971    brw->prim_restart.enable_cut_index = false;
 972    brw->gs.enabled = false;
 973    brw->clip.viewport_count = 1;
 974
 975    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 976
 977    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
 978
 979    ctx->VertexProgram._MaintainTnlProgram = true;
 980    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 981
 982    brw_draw_init( brw );
 983
 984    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 985       /* Turn on some extra GL_ARB_debug_output generation. */
 986       brw->perf_debug = true;
 987    }
 988
 989    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
 990       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 991       ctx->Const.RobustAccess = GL_TRUE;
 992    }
 993
 994    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 995       brw_init_shader_time(brw);
 996
 997    _mesa_compute_version(ctx);
 998
 999    _mesa_initialize_dispatch_tables(ctx);
1000    _mesa_initialize_vbo_vtxfmt(ctx);
1001
1002    if (ctx->Extensions.INTEL_performance_query)
1003       brw_init_performance_queries(brw);
1004
1005    vbo_use_buffer_objects(ctx);
1006    vbo_always_unmap_buffers(ctx);
1007
1008    return true;
1009 }
1010
1011 void
1012 intelDestroyContext(__DRIcontext * driContextPriv)
1013 {
1014    struct brw_context *brw =
1015       (struct brw_context *) driContextPriv->driverPrivate;
1016    struct gl_context *ctx = &brw->ctx;
1017
1018    _mesa_meta_free(&brw->ctx);
1019
1020    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1021       /* Force a report. */
1022       brw->shader_time.report_time = 0;
1023
1024       brw_collect_and_report_shader_time(brw);
1025       brw_destroy_shader_time(brw);
1026    }
1027
1028    if (brw->gen >= 6)
1029       blorp_finish(&brw->blorp);
1030
1031    brw_destroy_state(brw);
1032    brw_draw_destroy(brw);
1033
1034    brw_bo_unreference(brw->curbe.curbe_bo);
1035    if (brw->vs.base.scratch_bo)
1036       brw_bo_unreference(brw->vs.base.scratch_bo);
1037    if (brw->tcs.base.scratch_bo)
1038       brw_bo_unreference(brw->tcs.base.scratch_bo);
1039    if (brw->tes.base.scratch_bo)
1040       brw_bo_unreference(brw->tes.base.scratch_bo);
1041    if (brw->gs.base.scratch_bo)
1042       brw_bo_unreference(brw->gs.base.scratch_bo);
1043    if (brw->wm.base.scratch_bo)
1044       brw_bo_unreference(brw->wm.base.scratch_bo);
1045
1046    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1047
1048    if (ctx->swrast_context) {
1049       _swsetup_DestroyContext(&brw->ctx);
1050       _tnl_DestroyContext(&brw->ctx);
1051    }
1052    _vbo_DestroyContext(&brw->ctx);
1053
1054    if (ctx->swrast_context)
1055       _swrast_DestroyContext(&brw->ctx);
1056
1057    brw_fini_pipe_control(brw);
1058    intel_batchbuffer_free(&brw->batch);
1059
1060    brw_bo_unreference(brw->throttle_batch[1]);
1061    brw_bo_unreference(brw->throttle_batch[0]);
1062    brw->throttle_batch[1] = NULL;
1063    brw->throttle_batch[0] = NULL;
1064
1065    driDestroyOptionCache(&brw->optionCache);
1066
1067    /* free the Mesa context */
1068    _mesa_free_context_data(&brw->ctx);
1069
1070    ralloc_free(brw);
1071    driContextPriv->driverPrivate = NULL;
1072 }
1073
1074 GLboolean
1075 intelUnbindContext(__DRIcontext * driContextPriv)
1076 {
1077    /* Unset current context and dispath table */
1078    _mesa_make_current(NULL, NULL, NULL);
1079
1080    return true;
1081 }
1082
1083 /**
1084  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1085  * on window system framebuffers.
1086  *
1087  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1088  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1089  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1090  * for a visual where you're guaranteed to be capable, but it turns out that
1091  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1092  * incapable ones, because there's no difference between the two in resources
1093  * used.  Applications thus get built that accidentally rely on the default
1094  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1095  * great...
1096  *
1097  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1098  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1099  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1100  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1101  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1102  * and get no sRGB encode (assuming that both kinds of visual are available).
1103  * Thus our choice to support sRGB by default on our visuals for desktop would
1104  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1105  *
1106  * Unfortunately, renderbuffer setup happens before a context is created.  So
1107  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1108  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1109  * yet), we go turn that back off before anyone finds out.
1110  */
1111 static void
1112 intel_gles3_srgb_workaround(struct brw_context *brw,
1113                             struct gl_framebuffer *fb)
1114 {
1115    struct gl_context *ctx = &brw->ctx;
1116
1117    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1118       return;
1119
1120    /* Some day when we support the sRGB capable bit on visuals available for
1121     * GLES, we'll need to respect that and not disable things here.
1122     */
1123    fb->Visual.sRGBCapable = false;
1124    for (int i = 0; i < BUFFER_COUNT; i++) {
1125       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1126       if (rb)
1127          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1128    }
1129 }
1130
1131 GLboolean
1132 intelMakeCurrent(__DRIcontext * driContextPriv,
1133                  __DRIdrawable * driDrawPriv,
1134                  __DRIdrawable * driReadPriv)
1135 {
1136    struct brw_context *brw;
1137    GET_CURRENT_CONTEXT(curCtx);
1138
1139    if (driContextPriv)
1140       brw = (struct brw_context *) driContextPriv->driverPrivate;
1141    else
1142       brw = NULL;
1143
1144    /* According to the glXMakeCurrent() man page: "Pending commands to
1145     * the previous context, if any, are flushed before it is released."
1146     * But only flush if we're actually changing contexts.
1147     */
1148    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1149       _mesa_flush(curCtx);
1150    }
1151
1152    if (driContextPriv) {
1153       struct gl_context *ctx = &brw->ctx;
1154       struct gl_framebuffer *fb, *readFb;
1155
1156       if (driDrawPriv == NULL) {
1157          fb = _mesa_get_incomplete_framebuffer();
1158       } else {
1159          fb = driDrawPriv->driverPrivate;
1160          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1161       }
1162
1163       if (driReadPriv == NULL) {
1164          readFb = _mesa_get_incomplete_framebuffer();
1165       } else {
1166          readFb = driReadPriv->driverPrivate;
1167          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1168       }
1169
1170       /* The sRGB workaround changes the renderbuffer's format. We must change
1171        * the format before the renderbuffer's miptree get's allocated, otherwise
1172        * the formats of the renderbuffer and its miptree will differ.
1173        */
1174       intel_gles3_srgb_workaround(brw, fb);
1175       intel_gles3_srgb_workaround(brw, readFb);
1176
1177       /* If the context viewport hasn't been initialized, force a call out to
1178        * the loader to get buffers so we have a drawable size for the initial
1179        * viewport. */
1180       if (!brw->ctx.ViewportInitialized)
1181          intel_prepare_render(brw);
1182
1183       _mesa_make_current(ctx, fb, readFb);
1184    } else {
1185       _mesa_make_current(NULL, NULL, NULL);
1186    }
1187
1188    return true;
1189 }
1190
1191 void
1192 intel_resolve_for_dri2_flush(struct brw_context *brw,
1193                              __DRIdrawable *drawable)
1194 {
1195    if (brw->gen < 6) {
1196       /* MSAA and fast color clear are not supported, so don't waste time
1197        * checking whether a resolve is needed.
1198        */
1199       return;
1200    }
1201
1202    struct gl_framebuffer *fb = drawable->driverPrivate;
1203    struct intel_renderbuffer *rb;
1204
1205    /* Usually, only the back buffer will need to be downsampled. However,
1206     * the front buffer will also need it if the user has rendered into it.
1207     */
1208    static const gl_buffer_index buffers[2] = {
1209          BUFFER_BACK_LEFT,
1210          BUFFER_FRONT_LEFT,
1211    };
1212
1213    for (int i = 0; i < 2; ++i) {
1214       rb = intel_get_renderbuffer(fb, buffers[i]);
1215       if (rb == NULL || rb->mt == NULL)
1216          continue;
1217       if (rb->mt->num_samples <= 1) {
1218          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1219                 rb->layer_count == 1);
1220          intel_miptree_prepare_access(brw, rb->mt, 0, 1, 0, 1, false, false);
1221       } else {
1222          intel_renderbuffer_downsample(brw, rb);
1223       }
1224    }
1225 }
1226
1227 static unsigned
1228 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1229 {
1230    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1231 }
1232
1233 static void
1234 intel_query_dri2_buffers(struct brw_context *brw,
1235                          __DRIdrawable *drawable,
1236                          __DRIbuffer **buffers,
1237                          int *count);
1238
1239 static void
1240 intel_process_dri2_buffer(struct brw_context *brw,
1241                           __DRIdrawable *drawable,
1242                           __DRIbuffer *buffer,
1243                           struct intel_renderbuffer *rb,
1244                           const char *buffer_name);
1245
1246 static void
1247 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1248
1249 static void
1250 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1251 {
1252    struct gl_framebuffer *fb = drawable->driverPrivate;
1253    struct intel_renderbuffer *rb;
1254    __DRIbuffer *buffers = NULL;
1255    int count;
1256    const char *region_name;
1257
1258    /* Set this up front, so that in case our buffers get invalidated
1259     * while we're getting new buffers, we don't clobber the stamp and
1260     * thus ignore the invalidate. */
1261    drawable->lastStamp = drawable->dri2.stamp;
1262
1263    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1264       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1265
1266    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1267
1268    if (buffers == NULL)
1269       return;
1270
1271    for (int i = 0; i < count; i++) {
1272        switch (buffers[i].attachment) {
1273        case __DRI_BUFFER_FRONT_LEFT:
1274            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1275            region_name = "dri2 front buffer";
1276            break;
1277
1278        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1279            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1280            region_name = "dri2 fake front buffer";
1281            break;
1282
1283        case __DRI_BUFFER_BACK_LEFT:
1284            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1285            region_name = "dri2 back buffer";
1286            break;
1287
1288        case __DRI_BUFFER_DEPTH:
1289        case __DRI_BUFFER_HIZ:
1290        case __DRI_BUFFER_DEPTH_STENCIL:
1291        case __DRI_BUFFER_STENCIL:
1292        case __DRI_BUFFER_ACCUM:
1293        default:
1294            fprintf(stderr,
1295                    "unhandled buffer attach event, attachment type %d\n",
1296                    buffers[i].attachment);
1297            return;
1298        }
1299
1300        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1301    }
1302
1303 }
1304
1305 void
1306 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1307 {
1308    struct brw_context *brw = context->driverPrivate;
1309    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1310
1311    /* Set this up front, so that in case our buffers get invalidated
1312     * while we're getting new buffers, we don't clobber the stamp and
1313     * thus ignore the invalidate. */
1314    drawable->lastStamp = drawable->dri2.stamp;
1315
1316    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1317       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1318
1319    if (dri_screen->image.loader)
1320       intel_update_image_buffers(brw, drawable);
1321    else
1322       intel_update_dri2_buffers(brw, drawable);
1323
1324    driUpdateFramebufferSize(&brw->ctx, drawable);
1325 }
1326
1327 /**
1328  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1329  * state is required.
1330  */
1331 void
1332 intel_prepare_render(struct brw_context *brw)
1333 {
1334    struct gl_context *ctx = &brw->ctx;
1335    __DRIcontext *driContext = brw->driContext;
1336    __DRIdrawable *drawable;
1337
1338    drawable = driContext->driDrawablePriv;
1339    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1340       if (drawable->lastStamp != drawable->dri2.stamp)
1341          intel_update_renderbuffers(driContext, drawable);
1342       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1343    }
1344
1345    drawable = driContext->driReadablePriv;
1346    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1347       if (drawable->lastStamp != drawable->dri2.stamp)
1348          intel_update_renderbuffers(driContext, drawable);
1349       driContext->dri2.read_stamp = drawable->dri2.stamp;
1350    }
1351
1352    /* If we're currently rendering to the front buffer, the rendering
1353     * that will happen next will probably dirty the front buffer.  So
1354     * mark it as dirty here.
1355     */
1356    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1357       brw->front_buffer_dirty = true;
1358 }
1359
1360 /**
1361  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1362  *
1363  * To determine which DRI buffers to request, examine the renderbuffers
1364  * attached to the drawable's framebuffer. Then request the buffers with
1365  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1366  *
1367  * This is called from intel_update_renderbuffers().
1368  *
1369  * \param drawable      Drawable whose buffers are queried.
1370  * \param buffers       [out] List of buffers returned by DRI2 query.
1371  * \param buffer_count  [out] Number of buffers returned.
1372  *
1373  * \see intel_update_renderbuffers()
1374  * \see DRI2GetBuffers()
1375  * \see DRI2GetBuffersWithFormat()
1376  */
1377 static void
1378 intel_query_dri2_buffers(struct brw_context *brw,
1379                          __DRIdrawable *drawable,
1380                          __DRIbuffer **buffers,
1381                          int *buffer_count)
1382 {
1383    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1384    struct gl_framebuffer *fb = drawable->driverPrivate;
1385    int i = 0;
1386    unsigned attachments[8];
1387
1388    struct intel_renderbuffer *front_rb;
1389    struct intel_renderbuffer *back_rb;
1390
1391    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1392    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1393
1394    memset(attachments, 0, sizeof(attachments));
1395    if ((_mesa_is_front_buffer_drawing(fb) ||
1396         _mesa_is_front_buffer_reading(fb) ||
1397         !back_rb) && front_rb) {
1398       /* If a fake front buffer is in use, then querying for
1399        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1400        * the real front buffer to the fake front buffer.  So before doing the
1401        * query, we need to make sure all the pending drawing has landed in the
1402        * real front buffer.
1403        */
1404       intel_batchbuffer_flush(brw);
1405       intel_flush_front(&brw->ctx);
1406
1407       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1408       attachments[i++] = intel_bits_per_pixel(front_rb);
1409    } else if (front_rb && brw->front_buffer_dirty) {
1410       /* We have pending front buffer rendering, but we aren't querying for a
1411        * front buffer.  If the front buffer we have is a fake front buffer,
1412        * the X server is going to throw it away when it processes the query.
1413        * So before doing the query, make sure all the pending drawing has
1414        * landed in the real front buffer.
1415        */
1416       intel_batchbuffer_flush(brw);
1417       intel_flush_front(&brw->ctx);
1418    }
1419
1420    if (back_rb) {
1421       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1422       attachments[i++] = intel_bits_per_pixel(back_rb);
1423    }
1424
1425    assert(i <= ARRAY_SIZE(attachments));
1426
1427    *buffers =
1428       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1429                                                     &drawable->w,
1430                                                     &drawable->h,
1431                                                     attachments, i / 2,
1432                                                     buffer_count,
1433                                                     drawable->loaderPrivate);
1434 }
1435
1436 /**
1437  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1438  *
1439  * This is called from intel_update_renderbuffers().
1440  *
1441  * \par Note:
1442  *    DRI buffers whose attachment point is DRI2BufferStencil or
1443  *    DRI2BufferDepthStencil are handled as special cases.
1444  *
1445  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1446  *        that is passed to brw_bo_gem_create_from_name().
1447  *
1448  * \see intel_update_renderbuffers()
1449  */
1450 static void
1451 intel_process_dri2_buffer(struct brw_context *brw,
1452                           __DRIdrawable *drawable,
1453                           __DRIbuffer *buffer,
1454                           struct intel_renderbuffer *rb,
1455                           const char *buffer_name)
1456 {
1457    struct gl_framebuffer *fb = drawable->driverPrivate;
1458    struct brw_bo *bo;
1459
1460    if (!rb)
1461       return;
1462
1463    unsigned num_samples = rb->Base.Base.NumSamples;
1464
1465    /* We try to avoid closing and reopening the same BO name, because the first
1466     * use of a mapping of the buffer involves a bunch of page faulting which is
1467     * moderately expensive.
1468     */
1469    struct intel_mipmap_tree *last_mt;
1470    if (num_samples == 0)
1471       last_mt = rb->mt;
1472    else
1473       last_mt = rb->singlesample_mt;
1474
1475    uint32_t old_name = 0;
1476    if (last_mt) {
1477        /* The bo already has a name because the miptree was created by a
1478         * previous call to intel_process_dri2_buffer(). If a bo already has a
1479         * name, then brw_bo_flink() is a low-cost getter.  It does not
1480         * create a new name.
1481         */
1482       brw_bo_flink(last_mt->bo, &old_name);
1483    }
1484
1485    if (old_name == buffer->name)
1486       return;
1487
1488    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1489       fprintf(stderr,
1490               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1491               buffer->name, buffer->attachment,
1492               buffer->cpp, buffer->pitch);
1493    }
1494
1495    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1496                                           buffer->name);
1497    if (!bo) {
1498       fprintf(stderr,
1499               "Failed to open BO for returned DRI2 buffer "
1500               "(%dx%d, %s, named %d).\n"
1501               "This is likely a bug in the X Server that will lead to a "
1502               "crash soon.\n",
1503               drawable->w, drawable->h, buffer_name, buffer->name);
1504       return;
1505    }
1506
1507    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1508                                             drawable->w, drawable->h,
1509                                             buffer->pitch);
1510
1511    if (_mesa_is_front_buffer_drawing(fb) &&
1512        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1513         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1514        rb->Base.Base.NumSamples > 1) {
1515       intel_renderbuffer_upsample(brw, rb);
1516    }
1517
1518    assert(rb->mt);
1519
1520    brw_bo_unreference(bo);
1521 }
1522
1523 /**
1524  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1525  *
1526  * To determine which DRI buffers to request, examine the renderbuffers
1527  * attached to the drawable's framebuffer. Then request the buffers from
1528  * the image loader
1529  *
1530  * This is called from intel_update_renderbuffers().
1531  *
1532  * \param drawable      Drawable whose buffers are queried.
1533  * \param buffers       [out] List of buffers returned by DRI2 query.
1534  * \param buffer_count  [out] Number of buffers returned.
1535  *
1536  * \see intel_update_renderbuffers()
1537  */
1538
1539 static void
1540 intel_update_image_buffer(struct brw_context *intel,
1541                           __DRIdrawable *drawable,
1542                           struct intel_renderbuffer *rb,
1543                           __DRIimage *buffer,
1544                           enum __DRIimageBufferMask buffer_type)
1545 {
1546    struct gl_framebuffer *fb = drawable->driverPrivate;
1547
1548    if (!rb || !buffer->bo)
1549       return;
1550
1551    unsigned num_samples = rb->Base.Base.NumSamples;
1552
1553    /* Check and see if we're already bound to the right
1554     * buffer object
1555     */
1556    struct intel_mipmap_tree *last_mt;
1557    if (num_samples == 0)
1558       last_mt = rb->mt;
1559    else
1560       last_mt = rb->singlesample_mt;
1561
1562    if (last_mt && last_mt->bo == buffer->bo)
1563       return;
1564
1565    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1566                                             buffer->width, buffer->height,
1567                                             buffer->pitch);
1568
1569    if (_mesa_is_front_buffer_drawing(fb) &&
1570        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1571        rb->Base.Base.NumSamples > 1) {
1572       intel_renderbuffer_upsample(intel, rb);
1573    }
1574 }
1575
1576 static void
1577 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1578 {
1579    struct gl_framebuffer *fb = drawable->driverPrivate;
1580    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1581    struct intel_renderbuffer *front_rb;
1582    struct intel_renderbuffer *back_rb;
1583    struct __DRIimageList images;
1584    mesa_format format;
1585    uint32_t buffer_mask = 0;
1586    int ret;
1587
1588    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1589    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1590
1591    if (back_rb)
1592       format = intel_rb_format(back_rb);
1593    else if (front_rb)
1594       format = intel_rb_format(front_rb);
1595    else
1596       return;
1597
1598    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1599                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1600       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1601    }
1602
1603    if (back_rb)
1604       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1605
1606    ret = dri_screen->image.loader->getBuffers(drawable,
1607                                               driGLFormatToImageFormat(format),
1608                                               &drawable->dri2.stamp,
1609                                               drawable->loaderPrivate,
1610                                               buffer_mask,
1611                                               &images);
1612    if (!ret)
1613       return;
1614
1615    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1616       drawable->w = images.front->width;
1617       drawable->h = images.front->height;
1618       intel_update_image_buffer(brw,
1619                                 drawable,
1620                                 front_rb,
1621                                 images.front,
1622                                 __DRI_IMAGE_BUFFER_FRONT);
1623    }
1624
1625    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1626       drawable->w = images.back->width;
1627       drawable->h = images.back->height;
1628       intel_update_image_buffer(brw,
1629                                 drawable,
1630                                 back_rb,
1631                                 images.back,
1632                                 __DRI_IMAGE_BUFFER_BACK);
1633    }
1634 }