src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "main/imports.h"
  41 #include "main/macros.h"
  42 #include "main/points.h"
  43 #include "main/version.h"
  44 #include "main/vtxfmt.h"
  45 #include "main/texobj.h"
  46 #include "main/framebuffer.h"
  47 #include "main/stencil.h"
  48 #include "main/state.h"
  49 #include "main/spirv_extensions.h"
  50
  51 #include "vbo/vbo.h"
  52
  53 #include "drivers/common/driverfuncs.h"
  54 #include "drivers/common/meta.h"
  55 #include "utils.h"
  56
  57 #include "brw_context.h"
  58 #include "brw_defines.h"
  59 #include "brw_blorp.h"
  60 #include "brw_draw.h"
  61 #include "brw_state.h"
  62
  63 #include "intel_batchbuffer.h"
  64 #include "intel_buffer_objects.h"
  65 #include "intel_buffers.h"
  66 #include "intel_fbo.h"
  67 #include "intel_mipmap_tree.h"
  68 #include "intel_pixel.h"
  69 #include "intel_image.h"
  70 #include "intel_tex.h"
  71 #include "intel_tex_obj.h"
  72
  73 #include "swrast_setup/swrast_setup.h"
  74 #include "tnl/tnl.h"
  75 #include "tnl/t_pipeline.h"
  76 #include "util/ralloc.h"
  77 #include "util/debug.h"
  78 #include "util/disk_cache.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    const char *chipset;
 107    static char buffer[128];
 108    char *bsw = NULL;
 109
 110    switch (screen->deviceID) {
 111 #undef CHIPSET
 112 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 113 #include "pci_ids/i965_pci_ids.h"
 114    default:
 115       chipset = "Unknown Intel Chipset";
 116       break;
 117    }
 118
 119    /* Braswell branding is funny, so we have to fix it up here */
 120    if (screen->deviceID == 0x22B1) {
 121       bsw = strdup(chipset);
 122       char *needle = strstr(bsw, "XXX");
 123       if (needle) {
 124          memcpy(needle, get_bsw_model(screen), 3);
 125          chipset = bsw;
 126       }
 127    }
 128
 129    (void) driGetRendererString(buffer, chipset, 0);
 130    free(bsw);
 131    return buffer;
 132 }
 133
 134 static const GLubyte *
 135 intel_get_string(struct gl_context * ctx, GLenum name)
 136 {
 137    const struct brw_context *const brw = brw_context(ctx);
 138
 139    switch (name) {
 140    case GL_VENDOR:
 141       return (GLubyte *) brw_vendor_string;
 142
 143    case GL_RENDERER:
 144       return
 145          (GLubyte *) brw_get_renderer_string(brw->screen);
 146
 147    default:
 148       return NULL;
 149    }
 150 }
 151
 152 static void
 153 brw_set_background_context(struct gl_context *ctx,
 154                            struct util_queue_monitoring *queue_info)
 155 {
 156    struct brw_context *brw = brw_context(ctx);
 157    __DRIcontext *driContext = brw->driContext;
 158    __DRIscreen *driScreen = driContext->driScreenPriv;
 159    const __DRIbackgroundCallableExtension *backgroundCallable =
 160       driScreen->dri2.backgroundCallable;
 161
 162    /* Note: Mesa will only call this function if we've called
 163     * _mesa_enable_multithreading().  We only do that if the loader exposed
 164     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 165     * backgroundCallable is not NULL.
 166     */
 167    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 168 }
 169
 170 static void
 171 intel_viewport(struct gl_context *ctx)
 172 {
 173    struct brw_context *brw = brw_context(ctx);
 174    __DRIcontext *driContext = brw->driContext;
 175
 176    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 177       if (driContext->driDrawablePriv)
 178          dri2InvalidateDrawable(driContext->driDrawablePriv);
 179       if (driContext->driReadablePriv)
 180          dri2InvalidateDrawable(driContext->driReadablePriv);
 181    }
 182 }
 183
 184 static void
 185 intel_update_framebuffer(struct gl_context *ctx,
 186                          struct gl_framebuffer *fb)
 187 {
 188    struct brw_context *brw = brw_context(ctx);
 189
 190    /* Quantize the derived default number of samples
 191     */
 192    fb->DefaultGeometry._NumSamples =
 193       intel_quantize_num_samples(brw->screen,
 194                                  fb->DefaultGeometry.NumSamples);
 195 }
 196
 197 static void
 198 intel_update_state(struct gl_context * ctx)
 199 {
 200    GLuint new_state = ctx->NewState;
 201    struct brw_context *brw = brw_context(ctx);
 202
 203    if (ctx->swrast_context)
 204       _swrast_InvalidateState(ctx, new_state);
 205
 206    brw->NewGLState |= new_state;
 207
 208    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 209       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 210
 211    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 212       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 213       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 214       brw->stencil_write_enabled =
 215          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 216    }
 217
 218    if (new_state & _NEW_POLYGON)
 219       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 220
 221    if (new_state & _NEW_BUFFERS) {
 222       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 223       if (ctx->DrawBuffer != ctx->ReadBuffer)
 224          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 225    }
 226 }
 227
 228 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 229
 230 static void
 231 intel_flush_front(struct gl_context *ctx)
 232 {
 233    struct brw_context *brw = brw_context(ctx);
 234    __DRIcontext *driContext = brw->driContext;
 235    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 236    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 237
 238    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 239       if (flushFront(dri_screen) && driDrawable &&
 240           driDrawable->loaderPrivate) {
 241
 242          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 243           *
 244           * This potentially resolves both front and back buffer. It
 245           * is unnecessary to resolve the back, but harms nothing except
 246           * performance. And no one cares about front-buffer render
 247           * performance.
 248           */
 249          intel_resolve_for_dri2_flush(brw, driDrawable);
 250          intel_batchbuffer_flush(brw);
 251
 252          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 253
 254          /* We set the dirty bit in intel_prepare_render() if we're
 255           * front buffer rendering once we get there.
 256           */
 257          brw->front_buffer_dirty = false;
 258       }
 259    }
 260 }
 261
 262 static void
 263 brw_display_shared_buffer(struct brw_context *brw)
 264 {
 265    __DRIcontext *dri_context = brw->driContext;
 266    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 267    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 268    int fence_fd = -1;
 269
 270    if (!brw->is_shared_buffer_bound)
 271       return;
 272
 273    if (!brw->is_shared_buffer_dirty)
 274       return;
 275
 276    if (brw->screen->has_exec_fence) {
 277       /* This function is always called during a flush operation, so there is
 278        * no need to flush again here. But we want to provide a fence_fd to the
 279        * loader, and a redundant flush is the easiest way to acquire one.
 280        */
 281       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 282          return;
 283    }
 284
 285    dri_screen->mutableRenderBuffer.loader
 286       ->displaySharedBuffer(dri_drawable, fence_fd,
 287                             dri_drawable->loaderPrivate);
 288    brw->is_shared_buffer_dirty = false;
 289 }
 290
 291 static void
 292 intel_glFlush(struct gl_context *ctx)
 293 {
 294    struct brw_context *brw = brw_context(ctx);
 295
 296    intel_batchbuffer_flush(brw);
 297    intel_flush_front(ctx);
 298    brw_display_shared_buffer(brw);
 299    brw->need_flush_throttle = true;
 300 }
 301
 302 static void
 303 intel_finish(struct gl_context * ctx)
 304 {
 305    struct brw_context *brw = brw_context(ctx);
 306
 307    intel_glFlush(ctx);
 308
 309    if (brw->batch.last_bo)
 310       brw_bo_wait_rendering(brw->batch.last_bo);
 311 }
 312
 313 static void
 314 brw_init_driver_functions(struct brw_context *brw,
 315                           struct dd_function_table *functions)
 316 {
 317    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 318
 319    _mesa_init_driver_functions(functions);
 320
 321    /* GLX uses DRI2 invalidate events to handle window resizing.
 322     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 323     * which doesn't provide a mechanism for snooping the event queues.
 324     *
 325     * So EGL still relies on viewport hacks to handle window resizing.
 326     * This should go away with DRI3000.
 327     */
 328    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 329       functions->Viewport = intel_viewport;
 330
 331    functions->Flush = intel_glFlush;
 332    functions->Finish = intel_finish;
 333    functions->GetString = intel_get_string;
 334    functions->UpdateState = intel_update_state;
 335
 336    brw_init_draw_functions(functions);
 337    intelInitTextureFuncs(functions);
 338    intelInitTextureImageFuncs(functions);
 339    intelInitTextureCopyImageFuncs(functions);
 340    intelInitCopyImageFuncs(functions);
 341    intelInitClearFuncs(functions);
 342    intelInitBufferFuncs(functions);
 343    intelInitPixelFuncs(functions);
 344    intelInitBufferObjectFuncs(functions);
 345    brw_init_syncobj_functions(functions);
 346    brw_init_object_purgeable_functions(functions);
 347
 348    brwInitFragProgFuncs( functions );
 349    brw_init_common_queryobj_functions(functions);
 350    if (devinfo->gen >= 8 || devinfo->is_haswell)
 351       hsw_init_queryobj_functions(functions);
 352    else if (devinfo->gen >= 6)
 353       gen6_init_queryobj_functions(functions);
 354    else
 355       gen4_init_queryobj_functions(functions);
 356    brw_init_compute_functions(functions);
 357    brw_init_conditional_render_functions(functions);
 358
 359    functions->GenerateMipmap = brw_generate_mipmap;
 360
 361    functions->QueryInternalFormat = brw_query_internal_format;
 362
 363    functions->NewTransformFeedback = brw_new_transform_feedback;
 364    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 365    if (can_do_mi_math_and_lrr(brw->screen)) {
 366       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 367       functions->EndTransformFeedback = hsw_end_transform_feedback;
 368       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 369       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 370    } else if (devinfo->gen >= 7) {
 371       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 372       functions->EndTransformFeedback = gen7_end_transform_feedback;
 373       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 374       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 375       functions->GetTransformFeedbackVertexCount =
 376          brw_get_transform_feedback_vertex_count;
 377    } else {
 378       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 379       functions->EndTransformFeedback = brw_end_transform_feedback;
 380       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 381       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 382       functions->GetTransformFeedbackVertexCount =
 383          brw_get_transform_feedback_vertex_count;
 384    }
 385
 386    if (devinfo->gen >= 6)
 387       functions->GetSamplePosition = gen6_get_sample_position;
 388
 389    /* GL_ARB_get_program_binary */
 390    brw_program_binary_init(brw->screen->deviceID);
 391    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 392    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 393    functions->ProgramBinaryDeserializeDriverBlob =
 394       brw_deserialize_program_binary;
 395
 396    if (brw->screen->disk_cache) {
 397       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 398    }
 399
 400    functions->SetBackgroundContext = brw_set_background_context;
 401 }
 402
 403 static void
 404 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 405 {
 406    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 407    struct gl_context *ctx = &brw->ctx;
 408
 409    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 410     * you should enable the extension only on gen7+, but just in case let's
 411     * assert it.
 412     */
 413    assert(devinfo->gen >= 7);
 414
 415    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 416    ctx->Const.SpirVCapabilities.draw_parameters = true;
 417    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 418    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 419    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 420    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 421    ctx->Const.SpirVCapabilities.tessellation = true;
 422    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 423    ctx->Const.SpirVCapabilities.variable_pointers = true;
 424 }
 425
 426 static void
 427 brw_initialize_context_constants(struct brw_context *brw)
 428 {
 429    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 430    struct gl_context *ctx = &brw->ctx;
 431    const struct brw_compiler *compiler = brw->screen->compiler;
 432
 433    const bool stage_exists[MESA_SHADER_STAGES] = {
 434       [MESA_SHADER_VERTEX] = true,
 435       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 436       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 437       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 438       [MESA_SHADER_FRAGMENT] = true,
 439       [MESA_SHADER_COMPUTE] =
 440          (_mesa_is_desktop_gl(ctx) &&
 441           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 442          (ctx->API == API_OPENGLES2 &&
 443           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 444    };
 445
 446    unsigned num_stages = 0;
 447    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 448       if (stage_exists[i])
 449          num_stages++;
 450    }
 451
 452    unsigned max_samplers =
 453       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 454
 455    ctx->Const.MaxDualSourceDrawBuffers = 1;
 456    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 457    ctx->Const.MaxCombinedShaderOutputResources =
 458       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 459
 460    /* The timestamp register we can read for glGetTimestamp() is
 461     * sometimes only 32 bits, before scaling to nanoseconds (depending
 462     * on kernel).
 463     *
 464     * Once scaled to nanoseconds the timestamp would roll over at a
 465     * non-power-of-two, so an application couldn't use
 466     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 467     * report 36 bits and truncate at that (rolling over 5 times as
 468     * often as the HW counter), and when the 32-bit counter rolls
 469     * over, it happens to also be at a rollover in the reported value
 470     * from near (1<<36) to 0.
 471     *
 472     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 473     * rolls over every ~69 seconds.
 474     */
 475    ctx->Const.QueryCounterBits.Timestamp = 36;
 476
 477    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 478    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 479    if (devinfo->gen >= 7) {
 480       ctx->Const.MaxRenderbufferSize = 16384;
 481       ctx->Const.MaxTextureSize = 16384;
 482       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 483    } else {
 484       ctx->Const.MaxRenderbufferSize = 8192;
 485       ctx->Const.MaxTextureSize = 8192;
 486       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 487    }
 488    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 489    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 490    ctx->Const.MaxTextureMbytes = 1536;
 491    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 492    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 493    ctx->Const.MaxTextureLodBias = 15.0;
 494    ctx->Const.StripTextureBorder = true;
 495    if (devinfo->gen >= 7) {
 496       ctx->Const.MaxProgramTextureGatherComponents = 4;
 497       ctx->Const.MinProgramTextureGatherOffset = -32;
 498       ctx->Const.MaxProgramTextureGatherOffset = 31;
 499    } else if (devinfo->gen == 6) {
 500       ctx->Const.MaxProgramTextureGatherComponents = 1;
 501       ctx->Const.MinProgramTextureGatherOffset = -8;
 502       ctx->Const.MaxProgramTextureGatherOffset = 7;
 503    }
 504
 505    ctx->Const.MaxUniformBlockSize = 65536;
 506
 507    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 508       struct gl_program_constants *prog = &ctx->Const.Program[i];
 509
 510       if (!stage_exists[i])
 511          continue;
 512
 513       prog->MaxTextureImageUnits = max_samplers;
 514
 515       prog->MaxUniformBlocks = BRW_MAX_UBO;
 516       prog->MaxCombinedUniformComponents =
 517          prog->MaxUniformComponents +
 518          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 519
 520       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 521       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 522       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 523       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 524    }
 525
 526    ctx->Const.MaxTextureUnits =
 527       MIN2(ctx->Const.MaxTextureCoordUnits,
 528            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 529
 530    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 531    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 532    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 533    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 534    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 535    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 536    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 537
 538
 539    /* Hardware only supports a limited number of transform feedback buffers.
 540     * So we need to override the Mesa default (which is based only on software
 541     * limits).
 542     */
 543    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 544
 545    /* On Gen6, in the worst case, we use up one binding table entry per
 546     * transform feedback component (see comments above the definition of
 547     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 548     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 549     * BRW_MAX_SOL_BINDINGS.
 550     *
 551     * In "separate components" mode, we need to divide this value by
 552     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 553     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 554     */
 555    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 556    ctx->Const.MaxTransformFeedbackSeparateComponents =
 557       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 558
 559    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 560       !can_do_mi_math_and_lrr(brw->screen);
 561
 562    int max_samples;
 563    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 564    const int clamp_max_samples =
 565       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 566
 567    if (clamp_max_samples < 0) {
 568       max_samples = msaa_modes[0];
 569    } else {
 570       /* Select the largest supported MSAA mode that does not exceed
 571        * clamp_max_samples.
 572        */
 573       max_samples = 0;
 574       for (int i = 0; msaa_modes[i] != 0; ++i) {
 575          if (msaa_modes[i] <= clamp_max_samples) {
 576             max_samples = msaa_modes[i];
 577             break;
 578          }
 579       }
 580    }
 581
 582    ctx->Const.MaxSamples = max_samples;
 583    ctx->Const.MaxColorTextureSamples = max_samples;
 584    ctx->Const.MaxDepthTextureSamples = max_samples;
 585    ctx->Const.MaxIntegerSamples = max_samples;
 586    ctx->Const.MaxImageSamples = 0;
 587
 588    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 589     * to map indices of rectangular grid to sample numbers within a pixel.
 590     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 591     * extension implementation. For more details see the comment above
 592     * gen6_set_sample_maps() definition.
 593     */
 594    gen6_set_sample_maps(ctx);
 595
 596    ctx->Const.MinLineWidth = 1.0;
 597    ctx->Const.MinLineWidthAA = 1.0;
 598    if (devinfo->gen >= 6) {
 599       ctx->Const.MaxLineWidth = 7.375;
 600       ctx->Const.MaxLineWidthAA = 7.375;
 601       ctx->Const.LineWidthGranularity = 0.125;
 602    } else {
 603       ctx->Const.MaxLineWidth = 7.0;
 604       ctx->Const.MaxLineWidthAA = 7.0;
 605       ctx->Const.LineWidthGranularity = 0.5;
 606    }
 607
 608    /* For non-antialiased lines, we have to round the line width to the
 609     * nearest whole number. Make sure that we don't advertise a line
 610     * width that, when rounded, will be beyond the actual hardware
 611     * maximum.
 612     */
 613    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 614
 615    ctx->Const.MinPointSize = 1.0;
 616    ctx->Const.MinPointSizeAA = 1.0;
 617    ctx->Const.MaxPointSize = 255.0;
 618    ctx->Const.MaxPointSizeAA = 255.0;
 619    ctx->Const.PointSizeGranularity = 1.0;
 620
 621    if (devinfo->gen >= 5 || devinfo->is_g4x)
 622       ctx->Const.MaxClipPlanes = 8;
 623
 624    ctx->Const.GLSLFragCoordIsSysVal = true;
 625    ctx->Const.GLSLFrontFacingIsSysVal = true;
 626    ctx->Const.GLSLTessLevelsAsInputs = true;
 627    ctx->Const.PrimitiveRestartForPatches = true;
 628
 629    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 630    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 631    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 632    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 633    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 634    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 635    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 636    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 637    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 638    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 639    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 640    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 641       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 642            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 643
 644    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 645    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 646    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 647    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 648    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 649    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 650    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 651    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 652    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 653       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 654            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 655
 656    /* Fragment shaders use real, 32-bit twos-complement integers for all
 657     * integer types.
 658     */
 659    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 660    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 661    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 662    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 663    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 664
 665    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 666    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 667    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 668    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 669    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 670
 671    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 672     * but we're not sure how it's actually done for vertex order,
 673     * that affect provoking vertex decision. Always use last vertex
 674     * convention for quad primitive which works as expected for now.
 675     */
 676    if (devinfo->gen >= 6)
 677       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 678
 679    ctx->Const.NativeIntegers = true;
 680
 681    /* Regarding the CMP instruction, the Ivybridge PRM says:
 682     *
 683     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 684     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 685     *    0xFFFFFFFF) is assigned to dst."
 686     *
 687     * but PRMs for earlier generations say
 688     *
 689     *   "In dword format, one GRF may store up to 8 results. When the register
 690     *    is used later as a vector of Booleans, as only LSB at each channel
 691     *    contains meaning [sic] data, software should make sure all higher bits
 692     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 693     *
 694     * We select the representation of a true boolean uniform to be ~0, and fix
 695     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 696     */
 697    ctx->Const.UniformBooleanTrue = ~0;
 698
 699    /* From the gen4 PRM, volume 4 page 127:
 700     *
 701     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 702     *      the base address of the first element of the surface, computed in
 703     *      software by adding the surface base address to the byte offset of
 704     *      the element in the buffer."
 705     *
 706     * However, unaligned accesses are slower, so enforce buffer alignment.
 707     *
 708     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 709     * restriction: the start of the buffer needs to be 32B aligned.
 710     */
 711    ctx->Const.UniformBufferOffsetAlignment = 32;
 712
 713    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 714     * that we can safely have the CPU and GPU writing the same SSBO on
 715     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 716     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 717     * be updating disjoint regions of the buffer simultaneously and that will
 718     * break if the regions overlap the same cacheline.
 719     */
 720    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 721    ctx->Const.TextureBufferOffsetAlignment = 16;
 722    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 723
 724    if (devinfo->gen >= 6) {
 725       ctx->Const.MaxVarying = 32;
 726       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 727       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 728          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 729       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 730       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 731       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 732       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 733       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 734       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 735    }
 736
 737    /* We want the GLSL compiler to emit code that uses condition codes */
 738    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 739       ctx->Const.ShaderCompilerOptions[i] =
 740          brw->screen->compiler->glsl_compiler_options[i];
 741    }
 742
 743    if (devinfo->gen >= 7) {
 744       ctx->Const.MaxViewportWidth = 32768;
 745       ctx->Const.MaxViewportHeight = 32768;
 746    }
 747
 748    /* ARB_viewport_array, OES_viewport_array */
 749    if (devinfo->gen >= 6) {
 750       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 751       ctx->Const.ViewportSubpixelBits = 8;
 752
 753       /* Cast to float before negating because MaxViewportWidth is unsigned.
 754        */
 755       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 756       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 757    }
 758
 759    /* ARB_gpu_shader5 */
 760    if (devinfo->gen >= 7)
 761       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 762
 763    /* ARB_framebuffer_no_attachments */
 764    ctx->Const.MaxFramebufferWidth = 16384;
 765    ctx->Const.MaxFramebufferHeight = 16384;
 766    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 767    ctx->Const.MaxFramebufferSamples = max_samples;
 768
 769    /* OES_primitive_bounding_box */
 770    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 771
 772    /* TODO: We should be able to use STD430 packing by default on all hardware
 773     * but some piglit tests [1] currently fail on SNB when this is enabled.
 774     * The problem is the messages we're using for doing uniform pulls
 775     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 776     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 777     * sampler which doesn't have these restrictions.
 778     *
 779     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 780     * pull an entire cache line at a time for constant offset loads both of
 781     * which support almost any alignment.
 782     *
 783     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 784     */
 785    if (devinfo->gen >= 7)
 786       ctx->Const.UseSTD430AsDefaultPacking = true;
 787
 788    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 789       ctx->Const.AllowMappedBuffersDuringExecution = true;
 790
 791    /* GL_ARB_get_program_binary */
 792    ctx->Const.NumProgramBinaryFormats = 1;
 793 }
 794
 795 static void
 796 brw_initialize_cs_context_constants(struct brw_context *brw)
 797 {
 798    struct gl_context *ctx = &brw->ctx;
 799    const struct intel_screen *screen = brw->screen;
 800    struct gen_device_info *devinfo = &brw->screen->devinfo;
 801
 802    /* FINISHME: Do this for all platforms that the kernel supports */
 803    if (devinfo->is_cherryview &&
 804        screen->subslice_total > 0 && screen->eu_total > 0) {
 805       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 806       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 807
 808       /* Fuse configurations may give more threads than expected, never less. */
 809       if (max_cs_threads > devinfo->max_cs_threads)
 810          devinfo->max_cs_threads = max_cs_threads;
 811    }
 812
 813    /* Maximum number of scalar compute shader invocations that can be run in
 814     * parallel in the same subslice assuming SIMD32 dispatch.
 815     *
 816     * We don't advertise more than 64 threads, because we are limited to 64 by
 817     * our usage of thread_width_max in the gpgpu walker command. This only
 818     * currently impacts Haswell, which otherwise might be able to advertise 70
 819     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 820     * required the number of invocation needed for ARB_compute_shader.
 821     */
 822    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 823    const uint32_t max_invocations = 32 * max_threads;
 824    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 825    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 826    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 827    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 828    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 829 }
 830
 831 /**
 832  * Process driconf (drirc) options, setting appropriate context flags.
 833  *
 834  * intelInitExtensions still pokes at optionCache directly, in order to
 835  * avoid advertising various extensions.  No flags are set, so it makes
 836  * sense to continue doing that there.
 837  */
 838 static void
 839 brw_process_driconf_options(struct brw_context *brw)
 840 {
 841    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 842    struct gl_context *ctx = &brw->ctx;
 843
 844    driOptionCache *options = &brw->optionCache;
 845    driParseConfigFiles(options, &brw->screen->optionCache,
 846                        brw->driContext->driScreenPriv->myNum,
 847                        "i965", NULL, NULL, 0);
 848
 849    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 850        brw->has_hiz = false;
 851        /* On gen6, you can only do separate stencil with HIZ. */
 852        if (devinfo->gen == 6)
 853           brw->has_separate_stencil = false;
 854    }
 855
 856    if (driQueryOptionb(options, "mesa_no_error"))
 857       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 858
 859    if (driQueryOptionb(options, "always_flush_batch")) {
 860       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 861       brw->always_flush_batch = true;
 862    }
 863
 864    if (driQueryOptionb(options, "always_flush_cache")) {
 865       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 866       brw->always_flush_cache = true;
 867    }
 868
 869    if (driQueryOptionb(options, "disable_throttling")) {
 870       fprintf(stderr, "disabling flush throttling\n");
 871       brw->disable_throttling = true;
 872    }
 873
 874    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 875
 876    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 877       brw->screen->compiler->precise_trig = true;
 878
 879    ctx->Const.ForceGLSLExtensionsWarn =
 880       driQueryOptionb(options, "force_glsl_extensions_warn");
 881
 882    ctx->Const.ForceGLSLVersion =
 883       driQueryOptioni(options, "force_glsl_version");
 884
 885    ctx->Const.DisableGLSLLineContinuations =
 886       driQueryOptionb(options, "disable_glsl_line_continuations");
 887
 888    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 889       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 890
 891    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 892       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 893
 894    ctx->Const.AllowHigherCompatVersion =
 895       driQueryOptionb(options, "allow_higher_compat_version");
 896
 897    ctx->Const.ForceGLSLAbsSqrt =
 898       driQueryOptionb(options, "force_glsl_abs_sqrt");
 899
 900    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 901
 902    brw->dual_color_blend_by_location =
 903       driQueryOptionb(options, "dual_color_blend_by_location");
 904
 905    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 906       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 907
 908    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 909    driComputeOptionsSha1(&brw->screen->optionCache,
 910                          ctx->Const.dri_config_options_sha1);
 911 }
 912
 913 GLboolean
 914 brwCreateContext(gl_api api,
 915                  const struct gl_config *mesaVis,
 916                  __DRIcontext *driContextPriv,
 917                  const struct __DriverContextConfig *ctx_config,
 918                  unsigned *dri_ctx_error,
 919                  void *sharedContextPrivate)
 920 {
 921    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 922    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 923    const struct gen_device_info *devinfo = &screen->devinfo;
 924    struct dd_function_table functions;
 925
 926    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 927     * provides us with context reset notifications.
 928     */
 929    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 930                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 931                             __DRI_CTX_FLAG_NO_ERROR;
 932
 933    if (screen->has_context_reset_notification)
 934       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 935
 936    if (ctx_config->flags & ~allowed_flags) {
 937       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 938       return false;
 939    }
 940
 941    if (ctx_config->attribute_mask &
 942        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 943          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 944       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 945       return false;
 946    }
 947
 948    bool notify_reset =
 949       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 950        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 951
 952    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 953    if (!brw) {
 954       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 955       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 956       return false;
 957    }
 958    brw->perf_ctx = gen_perf_new_context(brw);
 959
 960    driContextPriv->driverPrivate = brw;
 961    brw->driContext = driContextPriv;
 962    brw->screen = screen;
 963    brw->bufmgr = screen->bufmgr;
 964
 965    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 966    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 967
 968    brw->has_swizzling = screen->hw_has_swizzling;
 969
 970    brw->isl_dev = screen->isl_dev;
 971
 972    brw->vs.base.stage = MESA_SHADER_VERTEX;
 973    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 974    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 975    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 976    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 977    brw->cs.base.stage = MESA_SHADER_COMPUTE;
 978
 979    brw_init_driver_functions(brw, &functions);
 980
 981    if (notify_reset)
 982       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 983
 984    brw_process_driconf_options(brw);
 985
 986    if (api == API_OPENGL_CORE &&
 987        driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
 988       api = API_OPENGL_COMPAT;
 989    }
 990
 991    struct gl_context *ctx = &brw->ctx;
 992
 993    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 994       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 995       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 996       intelDestroyContext(driContextPriv);
 997       return false;
 998    }
 999
1000    driContextSetFlags(ctx, ctx_config->flags);
1001
1002    /* Initialize the software rasterizer and helper modules.
1003     *
1004     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1005     * software fallbacks (which we have to support on legacy GL to do weird
1006     * glDrawPixels(), glBitmap(), and other functions).
1007     */
1008    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1009       _swrast_CreateContext(ctx);
1010    }
1011
1012    _vbo_CreateContext(ctx);
1013    if (ctx->swrast_context) {
1014       _tnl_CreateContext(ctx);
1015       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1016       _swsetup_CreateContext(ctx);
1017
1018       /* Configure swrast to match hardware characteristics: */
1019       _swrast_allow_pixel_fog(ctx, false);
1020       _swrast_allow_vertex_fog(ctx, true);
1021    }
1022
1023    _mesa_meta_init(ctx);
1024
1025    if (INTEL_DEBUG & DEBUG_PERF)
1026       brw->perf_debug = true;
1027
1028    brw_initialize_cs_context_constants(brw);
1029    brw_initialize_context_constants(brw);
1030
1031    ctx->Const.ResetStrategy = notify_reset
1032       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1033
1034    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1035    _mesa_init_point(ctx);
1036
1037    intel_fbo_init(brw);
1038
1039    intel_batchbuffer_init(brw);
1040
1041    /* Create a new hardware context.  Using a hardware context means that
1042     * our GPU state will be saved/restored on context switch, allowing us
1043     * to assume that the GPU is in the same state we left it in.
1044     *
1045     * This is required for transform feedback buffer offsets, query objects,
1046     * and also allows us to reduce how much state we have to emit.
1047     */
1048    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1049    if (!brw->hw_ctx && devinfo->gen >= 6) {
1050       fprintf(stderr, "Failed to create hardware context.\n");
1051       intelDestroyContext(driContextPriv);
1052       return false;
1053    }
1054
1055    if (brw->hw_ctx) {
1056       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1057       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1058          switch (ctx_config->priority) {
1059          case __DRI_CTX_PRIORITY_LOW:
1060             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1061             break;
1062          case __DRI_CTX_PRIORITY_HIGH:
1063             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1064             break;
1065          }
1066       }
1067       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1068           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1069          fprintf(stderr,
1070                  "Failed to set priority [%d:%d] for hardware context.\n",
1071                  ctx_config->priority, hw_priority);
1072          intelDestroyContext(driContextPriv);
1073          return false;
1074       }
1075    }
1076
1077    if (brw_init_pipe_control(brw, devinfo)) {
1078       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1079       intelDestroyContext(driContextPriv);
1080       return false;
1081    }
1082
1083    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1084
1085    brw_init_state(brw);
1086
1087    intelInitExtensions(ctx);
1088
1089    brw_init_surface_formats(brw);
1090
1091    brw_blorp_init(brw);
1092
1093    brw->urb.size = devinfo->urb.size;
1094
1095    if (devinfo->gen == 6)
1096       brw->urb.gs_present = false;
1097
1098    brw->prim_restart.in_progress = false;
1099    brw->prim_restart.enable_cut_index = false;
1100    brw->gs.enabled = false;
1101    brw->clip.viewport_count = 1;
1102
1103    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1104
1105    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1106
1107    ctx->VertexProgram._MaintainTnlProgram = true;
1108    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1109
1110    brw_draw_init( brw );
1111
1112    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1113       /* Turn on some extra GL_ARB_debug_output generation. */
1114       brw->perf_debug = true;
1115    }
1116
1117    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1118       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1119       ctx->Const.RobustAccess = GL_TRUE;
1120    }
1121
1122    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1123       brw_init_shader_time(brw);
1124
1125    _mesa_override_extensions(ctx);
1126    _mesa_compute_version(ctx);
1127
1128    /* GL_ARB_gl_spirv */
1129    if (ctx->Extensions.ARB_gl_spirv) {
1130       brw_initialize_spirv_supported_capabilities(brw);
1131
1132       if (ctx->Extensions.ARB_spirv_extensions) {
1133          /* GL_ARB_spirv_extensions */
1134          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1135          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1136                                                &ctx->Const.SpirVCapabilities);
1137       }
1138    }
1139
1140    _mesa_initialize_dispatch_tables(ctx);
1141    _mesa_initialize_vbo_vtxfmt(ctx);
1142
1143    if (ctx->Extensions.INTEL_performance_query)
1144       brw_init_performance_queries(brw);
1145
1146    vbo_use_buffer_objects(ctx);
1147    vbo_always_unmap_buffers(ctx);
1148
1149    brw->ctx.Cache = brw->screen->disk_cache;
1150
1151    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1152        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1153       /* Loader supports multithreading, and so do we. */
1154       _mesa_glthread_init(ctx);
1155    }
1156
1157    return true;
1158 }
1159
1160 void
1161 intelDestroyContext(__DRIcontext * driContextPriv)
1162 {
1163    struct brw_context *brw =
1164       (struct brw_context *) driContextPriv->driverPrivate;
1165    struct gl_context *ctx = &brw->ctx;
1166
1167    GET_CURRENT_CONTEXT(curctx);
1168
1169    if (curctx == NULL) {
1170       /* No current context, but we need one to release
1171        * renderbuffer surface when we release framebuffer.
1172        * So temporarily bind the context.
1173        */
1174       _mesa_make_current(ctx, NULL, NULL);
1175    }
1176
1177    _mesa_glthread_destroy(&brw->ctx);
1178
1179    _mesa_meta_free(&brw->ctx);
1180
1181    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1182       /* Force a report. */
1183       brw->shader_time.report_time = 0;
1184
1185       brw_collect_and_report_shader_time(brw);
1186       brw_destroy_shader_time(brw);
1187    }
1188
1189    blorp_finish(&brw->blorp);
1190
1191    brw_destroy_state(brw);
1192    brw_draw_destroy(brw);
1193
1194    brw_bo_unreference(brw->curbe.curbe_bo);
1195
1196    brw_bo_unreference(brw->vs.base.scratch_bo);
1197    brw_bo_unreference(brw->tcs.base.scratch_bo);
1198    brw_bo_unreference(brw->tes.base.scratch_bo);
1199    brw_bo_unreference(brw->gs.base.scratch_bo);
1200    brw_bo_unreference(brw->wm.base.scratch_bo);
1201
1202    brw_bo_unreference(brw->vs.base.push_const_bo);
1203    brw_bo_unreference(brw->tcs.base.push_const_bo);
1204    brw_bo_unreference(brw->tes.base.push_const_bo);
1205    brw_bo_unreference(brw->gs.base.push_const_bo);
1206    brw_bo_unreference(brw->wm.base.push_const_bo);
1207
1208    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1209
1210    if (ctx->swrast_context) {
1211       _swsetup_DestroyContext(&brw->ctx);
1212       _tnl_DestroyContext(&brw->ctx);
1213    }
1214    _vbo_DestroyContext(&brw->ctx);
1215
1216    if (ctx->swrast_context)
1217       _swrast_DestroyContext(&brw->ctx);
1218
1219    brw_fini_pipe_control(brw);
1220    intel_batchbuffer_free(&brw->batch);
1221
1222    brw_bo_unreference(brw->throttle_batch[1]);
1223    brw_bo_unreference(brw->throttle_batch[0]);
1224    brw->throttle_batch[1] = NULL;
1225    brw->throttle_batch[0] = NULL;
1226
1227    driDestroyOptionCache(&brw->optionCache);
1228
1229    /* free the Mesa context */
1230    _mesa_free_context_data(&brw->ctx, true);
1231
1232    ralloc_free(brw);
1233    driContextPriv->driverPrivate = NULL;
1234 }
1235
1236 GLboolean
1237 intelUnbindContext(__DRIcontext * driContextPriv)
1238 {
1239    struct gl_context *ctx = driContextPriv->driverPrivate;
1240    _mesa_glthread_finish(ctx);
1241
1242    /* Unset current context and dispath table */
1243    _mesa_make_current(NULL, NULL, NULL);
1244
1245    return true;
1246 }
1247
1248 /**
1249  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1250  * on window system framebuffers.
1251  *
1252  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1253  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1254  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1255  * for a visual where you're guaranteed to be capable, but it turns out that
1256  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1257  * incapable ones, because there's no difference between the two in resources
1258  * used.  Applications thus get built that accidentally rely on the default
1259  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1260  * great...
1261  *
1262  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1263  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1264  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1265  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1266  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1267  * and get no sRGB encode (assuming that both kinds of visual are available).
1268  * Thus our choice to support sRGB by default on our visuals for desktop would
1269  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1270  *
1271  * Unfortunately, renderbuffer setup happens before a context is created.  So
1272  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1273  * context (without an sRGB visual), we go turn that back off before anyone
1274  * finds out.
1275  */
1276 static void
1277 intel_gles3_srgb_workaround(struct brw_context *brw,
1278                             struct gl_framebuffer *fb)
1279 {
1280    struct gl_context *ctx = &brw->ctx;
1281
1282    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1283       return;
1284
1285    for (int i = 0; i < BUFFER_COUNT; i++) {
1286       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1287
1288       /* Check if sRGB was specifically asked for. */
1289       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1290       if (irb && irb->need_srgb)
1291          return;
1292
1293       if (rb)
1294          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1295    }
1296    /* Disable sRGB from framebuffers that are not compatible. */
1297    fb->Visual.sRGBCapable = false;
1298 }
1299
1300 GLboolean
1301 intelMakeCurrent(__DRIcontext * driContextPriv,
1302                  __DRIdrawable * driDrawPriv,
1303                  __DRIdrawable * driReadPriv)
1304 {
1305    struct brw_context *brw;
1306
1307    if (driContextPriv)
1308       brw = (struct brw_context *) driContextPriv->driverPrivate;
1309    else
1310       brw = NULL;
1311
1312    if (driContextPriv) {
1313       struct gl_context *ctx = &brw->ctx;
1314       struct gl_framebuffer *fb, *readFb;
1315
1316       if (driDrawPriv == NULL) {
1317          fb = _mesa_get_incomplete_framebuffer();
1318       } else {
1319          fb = driDrawPriv->driverPrivate;
1320          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1321       }
1322
1323       if (driReadPriv == NULL) {
1324          readFb = _mesa_get_incomplete_framebuffer();
1325       } else {
1326          readFb = driReadPriv->driverPrivate;
1327          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1328       }
1329
1330       /* The sRGB workaround changes the renderbuffer's format. We must change
1331        * the format before the renderbuffer's miptree get's allocated, otherwise
1332        * the formats of the renderbuffer and its miptree will differ.
1333        */
1334       intel_gles3_srgb_workaround(brw, fb);
1335       intel_gles3_srgb_workaround(brw, readFb);
1336
1337       /* If the context viewport hasn't been initialized, force a call out to
1338        * the loader to get buffers so we have a drawable size for the initial
1339        * viewport. */
1340       if (!brw->ctx.ViewportInitialized)
1341          intel_prepare_render(brw);
1342
1343       _mesa_make_current(ctx, fb, readFb);
1344    } else {
1345       GET_CURRENT_CONTEXT(ctx);
1346       _mesa_glthread_finish(ctx);
1347       _mesa_make_current(NULL, NULL, NULL);
1348    }
1349
1350    return true;
1351 }
1352
1353 void
1354 intel_resolve_for_dri2_flush(struct brw_context *brw,
1355                              __DRIdrawable *drawable)
1356 {
1357    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1358
1359    if (devinfo->gen < 6) {
1360       /* MSAA and fast color clear are not supported, so don't waste time
1361        * checking whether a resolve is needed.
1362        */
1363       return;
1364    }
1365
1366    struct gl_framebuffer *fb = drawable->driverPrivate;
1367    struct intel_renderbuffer *rb;
1368
1369    /* Usually, only the back buffer will need to be downsampled. However,
1370     * the front buffer will also need it if the user has rendered into it.
1371     */
1372    static const gl_buffer_index buffers[2] = {
1373          BUFFER_BACK_LEFT,
1374          BUFFER_FRONT_LEFT,
1375    };
1376
1377    for (int i = 0; i < 2; ++i) {
1378       rb = intel_get_renderbuffer(fb, buffers[i]);
1379       if (rb == NULL || rb->mt == NULL)
1380          continue;
1381       if (rb->mt->surf.samples == 1) {
1382          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1383                 rb->layer_count == 1);
1384          intel_miptree_prepare_external(brw, rb->mt);
1385       } else {
1386          intel_renderbuffer_downsample(brw, rb);
1387
1388          /* Call prepare_external on the single-sample miptree to do any
1389           * needed resolves prior to handing it off to the window system.
1390           * This is needed in the case that rb->singlesample_mt is Y-tiled
1391           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1392           * this case, the MSAA resolve above will write compressed data into
1393           * rb->singlesample_mt.
1394           *
1395           * TODO: Some day, if we decide to care about the tiny performance
1396           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1397           * we could detect this case and just allocate the single-sampled
1398           * miptree without aux.  However, that would be a lot of plumbing and
1399           * this is a rather exotic case so it's not really worth it.
1400           */
1401          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1402       }
1403    }
1404 }
1405
1406 static unsigned
1407 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1408 {
1409    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1410 }
1411
1412 static void
1413 intel_query_dri2_buffers(struct brw_context *brw,
1414                          __DRIdrawable *drawable,
1415                          __DRIbuffer **buffers,
1416                          int *count);
1417
1418 static void
1419 intel_process_dri2_buffer(struct brw_context *brw,
1420                           __DRIdrawable *drawable,
1421                           __DRIbuffer *buffer,
1422                           struct intel_renderbuffer *rb,
1423                           const char *buffer_name);
1424
1425 static void
1426 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1427
1428 static void
1429 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1430 {
1431    struct gl_framebuffer *fb = drawable->driverPrivate;
1432    struct intel_renderbuffer *rb;
1433    __DRIbuffer *buffers = NULL;
1434    int count;
1435    const char *region_name;
1436
1437    /* Set this up front, so that in case our buffers get invalidated
1438     * while we're getting new buffers, we don't clobber the stamp and
1439     * thus ignore the invalidate. */
1440    drawable->lastStamp = drawable->dri2.stamp;
1441
1442    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1443       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1444
1445    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1446
1447    if (buffers == NULL)
1448       return;
1449
1450    for (int i = 0; i < count; i++) {
1451        switch (buffers[i].attachment) {
1452        case __DRI_BUFFER_FRONT_LEFT:
1453            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1454            region_name = "dri2 front buffer";
1455            break;
1456
1457        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1458            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1459            region_name = "dri2 fake front buffer";
1460            break;
1461
1462        case __DRI_BUFFER_BACK_LEFT:
1463            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1464            region_name = "dri2 back buffer";
1465            break;
1466
1467        case __DRI_BUFFER_DEPTH:
1468        case __DRI_BUFFER_HIZ:
1469        case __DRI_BUFFER_DEPTH_STENCIL:
1470        case __DRI_BUFFER_STENCIL:
1471        case __DRI_BUFFER_ACCUM:
1472        default:
1473            fprintf(stderr,
1474                    "unhandled buffer attach event, attachment type %d\n",
1475                    buffers[i].attachment);
1476            return;
1477        }
1478
1479        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1480    }
1481
1482 }
1483
1484 void
1485 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1486 {
1487    struct brw_context *brw = context->driverPrivate;
1488    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1489
1490    /* Set this up front, so that in case our buffers get invalidated
1491     * while we're getting new buffers, we don't clobber the stamp and
1492     * thus ignore the invalidate. */
1493    drawable->lastStamp = drawable->dri2.stamp;
1494
1495    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1496       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1497
1498    if (dri_screen->image.loader)
1499       intel_update_image_buffers(brw, drawable);
1500    else
1501       intel_update_dri2_buffers(brw, drawable);
1502
1503    driUpdateFramebufferSize(&brw->ctx, drawable);
1504 }
1505
1506 /**
1507  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1508  * state is required.
1509  */
1510 void
1511 intel_prepare_render(struct brw_context *brw)
1512 {
1513    struct gl_context *ctx = &brw->ctx;
1514    __DRIcontext *driContext = brw->driContext;
1515    __DRIdrawable *drawable;
1516
1517    drawable = driContext->driDrawablePriv;
1518    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1519       if (drawable->lastStamp != drawable->dri2.stamp)
1520          intel_update_renderbuffers(driContext, drawable);
1521       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1522    }
1523
1524    drawable = driContext->driReadablePriv;
1525    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1526       if (drawable->lastStamp != drawable->dri2.stamp)
1527          intel_update_renderbuffers(driContext, drawable);
1528       driContext->dri2.read_stamp = drawable->dri2.stamp;
1529    }
1530
1531    /* If we're currently rendering to the front buffer, the rendering
1532     * that will happen next will probably dirty the front buffer.  So
1533     * mark it as dirty here.
1534     */
1535    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1536       brw->front_buffer_dirty = true;
1537
1538    if (brw->is_shared_buffer_bound) {
1539       /* Subsequent rendering will probably dirty the shared buffer. */
1540       brw->is_shared_buffer_dirty = true;
1541    }
1542 }
1543
1544 /**
1545  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1546  *
1547  * To determine which DRI buffers to request, examine the renderbuffers
1548  * attached to the drawable's framebuffer. Then request the buffers with
1549  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1550  *
1551  * This is called from intel_update_renderbuffers().
1552  *
1553  * \param drawable      Drawable whose buffers are queried.
1554  * \param buffers       [out] List of buffers returned by DRI2 query.
1555  * \param buffer_count  [out] Number of buffers returned.
1556  *
1557  * \see intel_update_renderbuffers()
1558  * \see DRI2GetBuffers()
1559  * \see DRI2GetBuffersWithFormat()
1560  */
1561 static void
1562 intel_query_dri2_buffers(struct brw_context *brw,
1563                          __DRIdrawable *drawable,
1564                          __DRIbuffer **buffers,
1565                          int *buffer_count)
1566 {
1567    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1568    struct gl_framebuffer *fb = drawable->driverPrivate;
1569    int i = 0;
1570    unsigned attachments[8];
1571
1572    struct intel_renderbuffer *front_rb;
1573    struct intel_renderbuffer *back_rb;
1574
1575    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1576    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1577
1578    memset(attachments, 0, sizeof(attachments));
1579    if ((_mesa_is_front_buffer_drawing(fb) ||
1580         _mesa_is_front_buffer_reading(fb) ||
1581         !back_rb) && front_rb) {
1582       /* If a fake front buffer is in use, then querying for
1583        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1584        * the real front buffer to the fake front buffer.  So before doing the
1585        * query, we need to make sure all the pending drawing has landed in the
1586        * real front buffer.
1587        */
1588       intel_batchbuffer_flush(brw);
1589       intel_flush_front(&brw->ctx);
1590
1591       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1592       attachments[i++] = intel_bits_per_pixel(front_rb);
1593    } else if (front_rb && brw->front_buffer_dirty) {
1594       /* We have pending front buffer rendering, but we aren't querying for a
1595        * front buffer.  If the front buffer we have is a fake front buffer,
1596        * the X server is going to throw it away when it processes the query.
1597        * So before doing the query, make sure all the pending drawing has
1598        * landed in the real front buffer.
1599        */
1600       intel_batchbuffer_flush(brw);
1601       intel_flush_front(&brw->ctx);
1602    }
1603
1604    if (back_rb) {
1605       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1606       attachments[i++] = intel_bits_per_pixel(back_rb);
1607    }
1608
1609    assert(i <= ARRAY_SIZE(attachments));
1610
1611    *buffers =
1612       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1613                                                     &drawable->w,
1614                                                     &drawable->h,
1615                                                     attachments, i / 2,
1616                                                     buffer_count,
1617                                                     drawable->loaderPrivate);
1618 }
1619
1620 /**
1621  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1622  *
1623  * This is called from intel_update_renderbuffers().
1624  *
1625  * \par Note:
1626  *    DRI buffers whose attachment point is DRI2BufferStencil or
1627  *    DRI2BufferDepthStencil are handled as special cases.
1628  *
1629  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1630  *        that is passed to brw_bo_gem_create_from_name().
1631  *
1632  * \see intel_update_renderbuffers()
1633  */
1634 static void
1635 intel_process_dri2_buffer(struct brw_context *brw,
1636                           __DRIdrawable *drawable,
1637                           __DRIbuffer *buffer,
1638                           struct intel_renderbuffer *rb,
1639                           const char *buffer_name)
1640 {
1641    struct gl_framebuffer *fb = drawable->driverPrivate;
1642    struct brw_bo *bo;
1643
1644    if (!rb)
1645       return;
1646
1647    unsigned num_samples = rb->Base.Base.NumSamples;
1648
1649    /* We try to avoid closing and reopening the same BO name, because the first
1650     * use of a mapping of the buffer involves a bunch of page faulting which is
1651     * moderately expensive.
1652     */
1653    struct intel_mipmap_tree *last_mt;
1654    if (num_samples == 0)
1655       last_mt = rb->mt;
1656    else
1657       last_mt = rb->singlesample_mt;
1658
1659    uint32_t old_name = 0;
1660    if (last_mt) {
1661        /* The bo already has a name because the miptree was created by a
1662         * previous call to intel_process_dri2_buffer(). If a bo already has a
1663         * name, then brw_bo_flink() is a low-cost getter.  It does not
1664         * create a new name.
1665         */
1666       brw_bo_flink(last_mt->bo, &old_name);
1667    }
1668
1669    if (old_name == buffer->name)
1670       return;
1671
1672    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1673       fprintf(stderr,
1674               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1675               buffer->name, buffer->attachment,
1676               buffer->cpp, buffer->pitch);
1677    }
1678
1679    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1680                                           buffer->name);
1681    if (!bo) {
1682       fprintf(stderr,
1683               "Failed to open BO for returned DRI2 buffer "
1684               "(%dx%d, %s, named %d).\n"
1685               "This is likely a bug in the X Server that will lead to a "
1686               "crash soon.\n",
1687               drawable->w, drawable->h, buffer_name, buffer->name);
1688       return;
1689    }
1690
1691    uint32_t tiling, swizzle;
1692    brw_bo_get_tiling(bo, &tiling, &swizzle);
1693
1694    struct intel_mipmap_tree *mt =
1695       intel_miptree_create_for_bo(brw,
1696                                   bo,
1697                                   intel_rb_format(rb),
1698                                   0,
1699                                   drawable->w,
1700                                   drawable->h,
1701                                   1,
1702                                   buffer->pitch,
1703                                   isl_tiling_from_i915_tiling(tiling),
1704                                   MIPTREE_CREATE_DEFAULT);
1705    if (!mt) {
1706       brw_bo_unreference(bo);
1707       return;
1708    }
1709
1710    /* We got this BO from X11.  We cana't assume that we have coherent texture
1711     * access because X may suddenly decide to use it for scan-out which would
1712     * destroy coherency.
1713     */
1714    bo->cache_coherent = false;
1715
1716    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1717                                                  drawable->w, drawable->h,
1718                                                  buffer->pitch)) {
1719       brw_bo_unreference(bo);
1720       intel_miptree_release(&mt);
1721       return;
1722    }
1723
1724    if (_mesa_is_front_buffer_drawing(fb) &&
1725        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1726         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1727        rb->Base.Base.NumSamples > 1) {
1728       intel_renderbuffer_upsample(brw, rb);
1729    }
1730
1731    assert(rb->mt);
1732
1733    brw_bo_unreference(bo);
1734 }
1735
1736 /**
1737  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1738  *
1739  * To determine which DRI buffers to request, examine the renderbuffers
1740  * attached to the drawable's framebuffer. Then request the buffers from
1741  * the image loader
1742  *
1743  * This is called from intel_update_renderbuffers().
1744  *
1745  * \param drawable      Drawable whose buffers are queried.
1746  * \param buffers       [out] List of buffers returned by DRI2 query.
1747  * \param buffer_count  [out] Number of buffers returned.
1748  *
1749  * \see intel_update_renderbuffers()
1750  */
1751
1752 static void
1753 intel_update_image_buffer(struct brw_context *intel,
1754                           __DRIdrawable *drawable,
1755                           struct intel_renderbuffer *rb,
1756                           __DRIimage *buffer,
1757                           enum __DRIimageBufferMask buffer_type)
1758 {
1759    struct gl_framebuffer *fb = drawable->driverPrivate;
1760
1761    if (!rb || !buffer->bo)
1762       return;
1763
1764    unsigned num_samples = rb->Base.Base.NumSamples;
1765
1766    /* Check and see if we're already bound to the right
1767     * buffer object
1768     */
1769    struct intel_mipmap_tree *last_mt;
1770    if (num_samples == 0)
1771       last_mt = rb->mt;
1772    else
1773       last_mt = rb->singlesample_mt;
1774
1775    if (last_mt && last_mt->bo == buffer->bo) {
1776       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1777          intel_miptree_make_shareable(intel, last_mt);
1778       }
1779       return;
1780    }
1781
1782    /* Only allow internal compression if samples == 0.  For multisampled
1783     * window system buffers, the only thing the single-sampled buffer is used
1784     * for is as a resolve target.  If we do any compression beyond what is
1785     * supported by the window system, we will just have to resolve so it's
1786     * probably better to just not bother.
1787     */
1788    const bool allow_internal_aux = (num_samples == 0);
1789
1790    struct intel_mipmap_tree *mt =
1791       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1792                                          intel_rb_format(rb),
1793                                          allow_internal_aux);
1794    if (!mt)
1795       return;
1796
1797    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1798                                                  buffer->width, buffer->height,
1799                                                  buffer->pitch)) {
1800       intel_miptree_release(&mt);
1801       return;
1802    }
1803
1804    if (_mesa_is_front_buffer_drawing(fb) &&
1805        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1806        rb->Base.Base.NumSamples > 1) {
1807       intel_renderbuffer_upsample(intel, rb);
1808    }
1809
1810    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1811       /* The compositor and the application may access this image
1812        * concurrently. The display hardware may even scanout the image while
1813        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1814        * concurrent access, so permanently disable aux for this miptree.
1815        *
1816        * Perhaps we could improve overall application performance by
1817        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1818        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1819        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1820        * approach to be highly dependent on the application's GL usage.
1821        *
1822        * I [chadv] expect clever disabling/reenabling to be counterproductive
1823        * in the use cases I care about: applications that render nearly
1824        * realtime handwriting to the surface while possibly undergiong
1825        * simultaneously scanout as a display plane. The app requires low
1826        * render latency. Even though the app spends most of its time in
1827        * shared-buffer mode, it also frequently transitions between
1828        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1829        * mode.  Visual sutter during the transitions should be avoided.
1830        *
1831        * In this case, I [chadv] believe reducing the GPU workload at
1832        * shared-buffer/double-buffer transitions would offer a smoother app
1833        * experience than any savings due to aux compression. But I've
1834        * collected no data to prove my theory.
1835        */
1836       intel_miptree_make_shareable(intel, mt);
1837    }
1838 }
1839
1840 static void
1841 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1842 {
1843    struct gl_framebuffer *fb = drawable->driverPrivate;
1844    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1845    struct intel_renderbuffer *front_rb;
1846    struct intel_renderbuffer *back_rb;
1847    struct __DRIimageList images;
1848    mesa_format format;
1849    uint32_t buffer_mask = 0;
1850    int ret;
1851
1852    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1853    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1854
1855    if (back_rb)
1856       format = intel_rb_format(back_rb);
1857    else if (front_rb)
1858       format = intel_rb_format(front_rb);
1859    else
1860       return;
1861
1862    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1863                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1864       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1865    }
1866
1867    if (back_rb)
1868       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1869
1870    ret = dri_screen->image.loader->getBuffers(drawable,
1871                                               driGLFormatToImageFormat(format),
1872                                               &drawable->dri2.stamp,
1873                                               drawable->loaderPrivate,
1874                                               buffer_mask,
1875                                               &images);
1876    if (!ret)
1877       return;
1878
1879    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1880       drawable->w = images.front->width;
1881       drawable->h = images.front->height;
1882       intel_update_image_buffer(brw,
1883                                 drawable,
1884                                 front_rb,
1885                                 images.front,
1886                                 __DRI_IMAGE_BUFFER_FRONT);
1887    }
1888
1889    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1890       drawable->w = images.back->width;
1891       drawable->h = images.back->height;
1892       intel_update_image_buffer(brw,
1893                                 drawable,
1894                                 back_rb,
1895                                 images.back,
1896                                 __DRI_IMAGE_BUFFER_BACK);
1897    }
1898
1899    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1900       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1901       drawable->w = images.back->width;
1902       drawable->h = images.back->height;
1903       intel_update_image_buffer(brw,
1904                                 drawable,
1905                                 back_rb,
1906                                 images.back,
1907                                 __DRI_IMAGE_BUFFER_SHARED);
1908       brw->is_shared_buffer_bound = true;
1909    } else {
1910       brw->is_shared_buffer_bound = false;
1911       brw->is_shared_buffer_dirty = false;
1912    }
1913 }