src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "main/imports.h"
  41 #include "main/macros.h"
  42 #include "main/points.h"
  43 #include "main/version.h"
  44 #include "main/vtxfmt.h"
  45 #include "main/texobj.h"
  46 #include "main/framebuffer.h"
  47 #include "main/stencil.h"
  48 #include "main/state.h"
  49 #include "main/spirv_extensions.h"
  50
  51 #include "vbo/vbo.h"
  52
  53 #include "drivers/common/driverfuncs.h"
  54 #include "drivers/common/meta.h"
  55 #include "utils.h"
  56
  57 #include "brw_context.h"
  58 #include "brw_defines.h"
  59 #include "brw_blorp.h"
  60 #include "brw_draw.h"
  61 #include "brw_state.h"
  62
  63 #include "intel_batchbuffer.h"
  64 #include "intel_buffer_objects.h"
  65 #include "intel_buffers.h"
  66 #include "intel_fbo.h"
  67 #include "intel_mipmap_tree.h"
  68 #include "intel_pixel.h"
  69 #include "intel_image.h"
  70 #include "intel_tex.h"
  71 #include "intel_tex_obj.h"
  72
  73 #include "swrast_setup/swrast_setup.h"
  74 #include "tnl/tnl.h"
  75 #include "tnl/t_pipeline.h"
  76 #include "util/ralloc.h"
  77 #include "util/debug.h"
  78 #include "util/disk_cache.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    const char *chipset;
 107    static char buffer[128];
 108    char *bsw = NULL;
 109
 110    switch (screen->deviceID) {
 111 #undef CHIPSET
 112 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 113 #include "pci_ids/i965_pci_ids.h"
 114    default:
 115       chipset = "Unknown Intel Chipset";
 116       break;
 117    }
 118
 119    /* Braswell branding is funny, so we have to fix it up here */
 120    if (screen->deviceID == 0x22B1) {
 121       bsw = strdup(chipset);
 122       char *needle = strstr(bsw, "XXX");
 123       if (needle) {
 124          memcpy(needle, get_bsw_model(screen), 3);
 125          chipset = bsw;
 126       }
 127    }
 128
 129    (void) driGetRendererString(buffer, chipset, 0);
 130    free(bsw);
 131    return buffer;
 132 }
 133
 134 static const GLubyte *
 135 intel_get_string(struct gl_context * ctx, GLenum name)
 136 {
 137    const struct brw_context *const brw = brw_context(ctx);
 138
 139    switch (name) {
 140    case GL_VENDOR:
 141       return (GLubyte *) brw_vendor_string;
 142
 143    case GL_RENDERER:
 144       return
 145          (GLubyte *) brw_get_renderer_string(brw->screen);
 146
 147    default:
 148       return NULL;
 149    }
 150 }
 151
 152 static void
 153 brw_set_background_context(struct gl_context *ctx,
 154                            struct util_queue_monitoring *queue_info)
 155 {
 156    struct brw_context *brw = brw_context(ctx);
 157    __DRIcontext *driContext = brw->driContext;
 158    __DRIscreen *driScreen = driContext->driScreenPriv;
 159    const __DRIbackgroundCallableExtension *backgroundCallable =
 160       driScreen->dri2.backgroundCallable;
 161
 162    /* Note: Mesa will only call this function if we've called
 163     * _mesa_enable_multithreading().  We only do that if the loader exposed
 164     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 165     * backgroundCallable is not NULL.
 166     */
 167    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 168 }
 169
 170 static void
 171 intel_viewport(struct gl_context *ctx)
 172 {
 173    struct brw_context *brw = brw_context(ctx);
 174    __DRIcontext *driContext = brw->driContext;
 175
 176    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 177       if (driContext->driDrawablePriv)
 178          dri2InvalidateDrawable(driContext->driDrawablePriv);
 179       if (driContext->driReadablePriv)
 180          dri2InvalidateDrawable(driContext->driReadablePriv);
 181    }
 182 }
 183
 184 static void
 185 intel_update_framebuffer(struct gl_context *ctx,
 186                          struct gl_framebuffer *fb)
 187 {
 188    struct brw_context *brw = brw_context(ctx);
 189
 190    /* Quantize the derived default number of samples
 191     */
 192    fb->DefaultGeometry._NumSamples =
 193       intel_quantize_num_samples(brw->screen,
 194                                  fb->DefaultGeometry.NumSamples);
 195 }
 196
 197 static void
 198 intel_update_state(struct gl_context * ctx)
 199 {
 200    GLuint new_state = ctx->NewState;
 201    struct brw_context *brw = brw_context(ctx);
 202
 203    if (ctx->swrast_context)
 204       _swrast_InvalidateState(ctx, new_state);
 205
 206    brw->NewGLState |= new_state;
 207
 208    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 209       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 210
 211    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 212       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 213       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 214       brw->stencil_write_enabled =
 215          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 216    }
 217
 218    if (new_state & _NEW_POLYGON)
 219       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 220
 221    if (new_state & _NEW_BUFFERS) {
 222       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 223       if (ctx->DrawBuffer != ctx->ReadBuffer)
 224          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 225    }
 226 }
 227
 228 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 229
 230 static void
 231 intel_flush_front(struct gl_context *ctx)
 232 {
 233    struct brw_context *brw = brw_context(ctx);
 234    __DRIcontext *driContext = brw->driContext;
 235    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 236    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 237
 238    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 239       if (flushFront(dri_screen) && driDrawable &&
 240           driDrawable->loaderPrivate) {
 241
 242          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 243           *
 244           * This potentially resolves both front and back buffer. It
 245           * is unnecessary to resolve the back, but harms nothing except
 246           * performance. And no one cares about front-buffer render
 247           * performance.
 248           */
 249          intel_resolve_for_dri2_flush(brw, driDrawable);
 250          intel_batchbuffer_flush(brw);
 251
 252          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 253
 254          /* We set the dirty bit in intel_prepare_render() if we're
 255           * front buffer rendering once we get there.
 256           */
 257          brw->front_buffer_dirty = false;
 258       }
 259    }
 260 }
 261
 262 static void
 263 brw_display_shared_buffer(struct brw_context *brw)
 264 {
 265    __DRIcontext *dri_context = brw->driContext;
 266    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 267    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 268    int fence_fd = -1;
 269
 270    if (!brw->is_shared_buffer_bound)
 271       return;
 272
 273    if (!brw->is_shared_buffer_dirty)
 274       return;
 275
 276    if (brw->screen->has_exec_fence) {
 277       /* This function is always called during a flush operation, so there is
 278        * no need to flush again here. But we want to provide a fence_fd to the
 279        * loader, and a redundant flush is the easiest way to acquire one.
 280        */
 281       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 282          return;
 283    }
 284
 285    dri_screen->mutableRenderBuffer.loader
 286       ->displaySharedBuffer(dri_drawable, fence_fd,
 287                             dri_drawable->loaderPrivate);
 288    brw->is_shared_buffer_dirty = false;
 289 }
 290
 291 static void
 292 intel_glFlush(struct gl_context *ctx)
 293 {
 294    struct brw_context *brw = brw_context(ctx);
 295
 296    intel_batchbuffer_flush(brw);
 297    intel_flush_front(ctx);
 298    brw_display_shared_buffer(brw);
 299    brw->need_flush_throttle = true;
 300 }
 301
 302 static void
 303 intel_finish(struct gl_context * ctx)
 304 {
 305    struct brw_context *brw = brw_context(ctx);
 306
 307    intel_glFlush(ctx);
 308
 309    if (brw->batch.last_bo)
 310       brw_bo_wait_rendering(brw->batch.last_bo);
 311 }
 312
 313 static void
 314 brw_init_driver_functions(struct brw_context *brw,
 315                           struct dd_function_table *functions)
 316 {
 317    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 318
 319    _mesa_init_driver_functions(functions);
 320
 321    /* GLX uses DRI2 invalidate events to handle window resizing.
 322     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 323     * which doesn't provide a mechanism for snooping the event queues.
 324     *
 325     * So EGL still relies on viewport hacks to handle window resizing.
 326     * This should go away with DRI3000.
 327     */
 328    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 329       functions->Viewport = intel_viewport;
 330
 331    functions->Flush = intel_glFlush;
 332    functions->Finish = intel_finish;
 333    functions->GetString = intel_get_string;
 334    functions->UpdateState = intel_update_state;
 335
 336    brw_init_draw_functions(functions);
 337    intelInitTextureFuncs(functions);
 338    intelInitTextureImageFuncs(functions);
 339    intelInitTextureCopyImageFuncs(functions);
 340    intelInitCopyImageFuncs(functions);
 341    intelInitClearFuncs(functions);
 342    intelInitBufferFuncs(functions);
 343    intelInitPixelFuncs(functions);
 344    intelInitBufferObjectFuncs(functions);
 345    brw_init_syncobj_functions(functions);
 346    brw_init_object_purgeable_functions(functions);
 347
 348    brwInitFragProgFuncs( functions );
 349    brw_init_common_queryobj_functions(functions);
 350    if (devinfo->gen >= 8 || devinfo->is_haswell)
 351       hsw_init_queryobj_functions(functions);
 352    else if (devinfo->gen >= 6)
 353       gen6_init_queryobj_functions(functions);
 354    else
 355       gen4_init_queryobj_functions(functions);
 356    brw_init_compute_functions(functions);
 357    brw_init_conditional_render_functions(functions);
 358
 359    functions->GenerateMipmap = brw_generate_mipmap;
 360
 361    functions->QueryInternalFormat = brw_query_internal_format;
 362
 363    functions->NewTransformFeedback = brw_new_transform_feedback;
 364    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 365    if (can_do_mi_math_and_lrr(brw->screen)) {
 366       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 367       functions->EndTransformFeedback = hsw_end_transform_feedback;
 368       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 369       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 370    } else if (devinfo->gen >= 7) {
 371       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 372       functions->EndTransformFeedback = gen7_end_transform_feedback;
 373       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 374       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 375       functions->GetTransformFeedbackVertexCount =
 376          brw_get_transform_feedback_vertex_count;
 377    } else {
 378       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 379       functions->EndTransformFeedback = brw_end_transform_feedback;
 380       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 381       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 382       functions->GetTransformFeedbackVertexCount =
 383          brw_get_transform_feedback_vertex_count;
 384    }
 385
 386    if (devinfo->gen >= 6)
 387       functions->GetSamplePosition = gen6_get_sample_position;
 388
 389    /* GL_ARB_get_program_binary */
 390    brw_program_binary_init(brw->screen->deviceID);
 391    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 392    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 393    functions->ProgramBinaryDeserializeDriverBlob =
 394       brw_deserialize_program_binary;
 395
 396    if (brw->screen->disk_cache) {
 397       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 398    }
 399
 400    functions->SetBackgroundContext = brw_set_background_context;
 401 }
 402
 403 static void
 404 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 405 {
 406    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 407    struct gl_context *ctx = &brw->ctx;
 408
 409    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 410     * you should enable the extension only on gen7+, but just in case let's
 411     * assert it.
 412     */
 413    assert(devinfo->gen >= 7);
 414
 415    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 416    ctx->Const.SpirVCapabilities.draw_parameters = true;
 417    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 418    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 419    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 420    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 421    ctx->Const.SpirVCapabilities.tessellation = true;
 422    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 423    ctx->Const.SpirVCapabilities.variable_pointers = true;
 424 }
 425
 426 static void
 427 brw_initialize_context_constants(struct brw_context *brw)
 428 {
 429    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 430    struct gl_context *ctx = &brw->ctx;
 431    const struct brw_compiler *compiler = brw->screen->compiler;
 432
 433    const bool stage_exists[MESA_SHADER_STAGES] = {
 434       [MESA_SHADER_VERTEX] = true,
 435       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 436       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 437       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 438       [MESA_SHADER_FRAGMENT] = true,
 439       [MESA_SHADER_COMPUTE] =
 440          (_mesa_is_desktop_gl(ctx) &&
 441           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 442          (ctx->API == API_OPENGLES2 &&
 443           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 444    };
 445
 446    unsigned num_stages = 0;
 447    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 448       if (stage_exists[i])
 449          num_stages++;
 450    }
 451
 452    unsigned max_samplers =
 453       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 454
 455    ctx->Const.MaxDualSourceDrawBuffers = 1;
 456    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 457    ctx->Const.MaxCombinedShaderOutputResources =
 458       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 459
 460    /* The timestamp register we can read for glGetTimestamp() is
 461     * sometimes only 32 bits, before scaling to nanoseconds (depending
 462     * on kernel).
 463     *
 464     * Once scaled to nanoseconds the timestamp would roll over at a
 465     * non-power-of-two, so an application couldn't use
 466     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 467     * report 36 bits and truncate at that (rolling over 5 times as
 468     * often as the HW counter), and when the 32-bit counter rolls
 469     * over, it happens to also be at a rollover in the reported value
 470     * from near (1<<36) to 0.
 471     *
 472     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 473     * rolls over every ~69 seconds.
 474     */
 475    ctx->Const.QueryCounterBits.Timestamp = 36;
 476
 477    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 478    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 479    if (devinfo->gen >= 7) {
 480       ctx->Const.MaxRenderbufferSize = 16384;
 481       ctx->Const.MaxTextureSize = 16384;
 482       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 483    } else {
 484       ctx->Const.MaxRenderbufferSize = 8192;
 485       ctx->Const.MaxTextureSize = 8192;
 486       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 487    }
 488    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 489    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 490    ctx->Const.MaxTextureMbytes = 1536;
 491    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 492    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 493    ctx->Const.MaxTextureLodBias = 15.0;
 494    ctx->Const.StripTextureBorder = true;
 495    if (devinfo->gen >= 7) {
 496       ctx->Const.MaxProgramTextureGatherComponents = 4;
 497       ctx->Const.MinProgramTextureGatherOffset = -32;
 498       ctx->Const.MaxProgramTextureGatherOffset = 31;
 499    } else if (devinfo->gen == 6) {
 500       ctx->Const.MaxProgramTextureGatherComponents = 1;
 501       ctx->Const.MinProgramTextureGatherOffset = -8;
 502       ctx->Const.MaxProgramTextureGatherOffset = 7;
 503    }
 504
 505    ctx->Const.MaxUniformBlockSize = 65536;
 506
 507    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 508       struct gl_program_constants *prog = &ctx->Const.Program[i];
 509
 510       if (!stage_exists[i])
 511          continue;
 512
 513       prog->MaxTextureImageUnits = max_samplers;
 514
 515       prog->MaxUniformBlocks = BRW_MAX_UBO;
 516       prog->MaxCombinedUniformComponents =
 517          prog->MaxUniformComponents +
 518          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 519
 520       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 521       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 522       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 523       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 524    }
 525
 526    ctx->Const.MaxTextureUnits =
 527       MIN2(ctx->Const.MaxTextureCoordUnits,
 528            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 529
 530    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 531    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 532    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 533    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 534    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 535    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 536    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 537
 538
 539    /* Hardware only supports a limited number of transform feedback buffers.
 540     * So we need to override the Mesa default (which is based only on software
 541     * limits).
 542     */
 543    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 544
 545    /* On Gen6, in the worst case, we use up one binding table entry per
 546     * transform feedback component (see comments above the definition of
 547     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 548     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 549     * BRW_MAX_SOL_BINDINGS.
 550     *
 551     * In "separate components" mode, we need to divide this value by
 552     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 553     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 554     */
 555    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 556    ctx->Const.MaxTransformFeedbackSeparateComponents =
 557       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 558
 559    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 560       !can_do_mi_math_and_lrr(brw->screen);
 561
 562    int max_samples;
 563    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 564    const int clamp_max_samples =
 565       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 566
 567    if (clamp_max_samples < 0) {
 568       max_samples = msaa_modes[0];
 569    } else {
 570       /* Select the largest supported MSAA mode that does not exceed
 571        * clamp_max_samples.
 572        */
 573       max_samples = 0;
 574       for (int i = 0; msaa_modes[i] != 0; ++i) {
 575          if (msaa_modes[i] <= clamp_max_samples) {
 576             max_samples = msaa_modes[i];
 577             break;
 578          }
 579       }
 580    }
 581
 582    ctx->Const.MaxSamples = max_samples;
 583    ctx->Const.MaxColorTextureSamples = max_samples;
 584    ctx->Const.MaxDepthTextureSamples = max_samples;
 585    ctx->Const.MaxIntegerSamples = max_samples;
 586    ctx->Const.MaxImageSamples = 0;
 587
 588    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 589     * to map indices of rectangular grid to sample numbers within a pixel.
 590     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 591     * extension implementation. For more details see the comment above
 592     * gen6_set_sample_maps() definition.
 593     */
 594    gen6_set_sample_maps(ctx);
 595
 596    ctx->Const.MinLineWidth = 1.0;
 597    ctx->Const.MinLineWidthAA = 1.0;
 598    if (devinfo->gen >= 6) {
 599       ctx->Const.MaxLineWidth = 7.375;
 600       ctx->Const.MaxLineWidthAA = 7.375;
 601       ctx->Const.LineWidthGranularity = 0.125;
 602    } else {
 603       ctx->Const.MaxLineWidth = 7.0;
 604       ctx->Const.MaxLineWidthAA = 7.0;
 605       ctx->Const.LineWidthGranularity = 0.5;
 606    }
 607
 608    /* For non-antialiased lines, we have to round the line width to the
 609     * nearest whole number. Make sure that we don't advertise a line
 610     * width that, when rounded, will be beyond the actual hardware
 611     * maximum.
 612     */
 613    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 614
 615    ctx->Const.MinPointSize = 1.0;
 616    ctx->Const.MinPointSizeAA = 1.0;
 617    ctx->Const.MaxPointSize = 255.0;
 618    ctx->Const.MaxPointSizeAA = 255.0;
 619    ctx->Const.PointSizeGranularity = 1.0;
 620
 621    if (devinfo->gen >= 5 || devinfo->is_g4x)
 622       ctx->Const.MaxClipPlanes = 8;
 623
 624    ctx->Const.GLSLTessLevelsAsInputs = true;
 625    ctx->Const.PrimitiveRestartForPatches = true;
 626
 627    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 628    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 629    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 630    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 631    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 632    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 633    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 634    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 635    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 636    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 637    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 638    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 639       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 640            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 641
 642    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 643    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 644    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 645    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 646    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 647    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 648    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 649    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 650    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 651       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 652            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 653
 654    /* Fragment shaders use real, 32-bit twos-complement integers for all
 655     * integer types.
 656     */
 657    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 658    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 659    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 660    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 661    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 662
 663    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 664    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 665    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 666    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 667    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 668
 669    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 670     * but we're not sure how it's actually done for vertex order,
 671     * that affect provoking vertex decision. Always use last vertex
 672     * convention for quad primitive which works as expected for now.
 673     */
 674    if (devinfo->gen >= 6)
 675       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 676
 677    ctx->Const.NativeIntegers = true;
 678
 679    /* Regarding the CMP instruction, the Ivybridge PRM says:
 680     *
 681     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 682     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 683     *    0xFFFFFFFF) is assigned to dst."
 684     *
 685     * but PRMs for earlier generations say
 686     *
 687     *   "In dword format, one GRF may store up to 8 results. When the register
 688     *    is used later as a vector of Booleans, as only LSB at each channel
 689     *    contains meaning [sic] data, software should make sure all higher bits
 690     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 691     *
 692     * We select the representation of a true boolean uniform to be ~0, and fix
 693     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 694     */
 695    ctx->Const.UniformBooleanTrue = ~0;
 696
 697    /* From the gen4 PRM, volume 4 page 127:
 698     *
 699     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 700     *      the base address of the first element of the surface, computed in
 701     *      software by adding the surface base address to the byte offset of
 702     *      the element in the buffer."
 703     *
 704     * However, unaligned accesses are slower, so enforce buffer alignment.
 705     *
 706     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 707     * restriction: the start of the buffer needs to be 32B aligned.
 708     */
 709    ctx->Const.UniformBufferOffsetAlignment = 32;
 710
 711    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 712     * that we can safely have the CPU and GPU writing the same SSBO on
 713     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 714     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 715     * be updating disjoint regions of the buffer simultaneously and that will
 716     * break if the regions overlap the same cacheline.
 717     */
 718    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 719    ctx->Const.TextureBufferOffsetAlignment = 16;
 720    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 721
 722    if (devinfo->gen >= 6) {
 723       ctx->Const.MaxVarying = 32;
 724       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 725       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 726          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 727       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 728       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 729       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 730       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 731       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 732       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 733    }
 734
 735    /* We want the GLSL compiler to emit code that uses condition codes */
 736    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 737       ctx->Const.ShaderCompilerOptions[i] =
 738          brw->screen->compiler->glsl_compiler_options[i];
 739    }
 740
 741    if (devinfo->gen >= 7) {
 742       ctx->Const.MaxViewportWidth = 32768;
 743       ctx->Const.MaxViewportHeight = 32768;
 744    }
 745
 746    /* ARB_viewport_array, OES_viewport_array */
 747    if (devinfo->gen >= 6) {
 748       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 749       ctx->Const.ViewportSubpixelBits = 8;
 750
 751       /* Cast to float before negating because MaxViewportWidth is unsigned.
 752        */
 753       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 754       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 755    }
 756
 757    /* ARB_gpu_shader5 */
 758    if (devinfo->gen >= 7)
 759       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 760
 761    /* ARB_framebuffer_no_attachments */
 762    ctx->Const.MaxFramebufferWidth = 16384;
 763    ctx->Const.MaxFramebufferHeight = 16384;
 764    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 765    ctx->Const.MaxFramebufferSamples = max_samples;
 766
 767    /* OES_primitive_bounding_box */
 768    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 769
 770    /* TODO: We should be able to use STD430 packing by default on all hardware
 771     * but some piglit tests [1] currently fail on SNB when this is enabled.
 772     * The problem is the messages we're using for doing uniform pulls
 773     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 774     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 775     * sampler which doesn't have these restrictions.
 776     *
 777     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 778     * pull an entire cache line at a time for constant offset loads both of
 779     * which support almost any alignment.
 780     *
 781     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 782     */
 783    if (devinfo->gen >= 7)
 784       ctx->Const.UseSTD430AsDefaultPacking = true;
 785
 786    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 787       ctx->Const.AllowMappedBuffersDuringExecution = true;
 788
 789    /* GL_ARB_get_program_binary */
 790    ctx->Const.NumProgramBinaryFormats = 1;
 791 }
 792
 793 static void
 794 brw_initialize_cs_context_constants(struct brw_context *brw)
 795 {
 796    struct gl_context *ctx = &brw->ctx;
 797    const struct intel_screen *screen = brw->screen;
 798    struct gen_device_info *devinfo = &brw->screen->devinfo;
 799
 800    /* FINISHME: Do this for all platforms that the kernel supports */
 801    if (devinfo->is_cherryview &&
 802        screen->subslice_total > 0 && screen->eu_total > 0) {
 803       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 804       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 805
 806       /* Fuse configurations may give more threads than expected, never less. */
 807       if (max_cs_threads > devinfo->max_cs_threads)
 808          devinfo->max_cs_threads = max_cs_threads;
 809    }
 810
 811    /* Maximum number of scalar compute shader invocations that can be run in
 812     * parallel in the same subslice assuming SIMD32 dispatch.
 813     *
 814     * We don't advertise more than 64 threads, because we are limited to 64 by
 815     * our usage of thread_width_max in the gpgpu walker command. This only
 816     * currently impacts Haswell, which otherwise might be able to advertise 70
 817     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 818     * required the number of invocation needed for ARB_compute_shader.
 819     */
 820    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 821    const uint32_t max_invocations = 32 * max_threads;
 822    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 823    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 824    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 825    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 826    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 827 }
 828
 829 /**
 830  * Process driconf (drirc) options, setting appropriate context flags.
 831  *
 832  * intelInitExtensions still pokes at optionCache directly, in order to
 833  * avoid advertising various extensions.  No flags are set, so it makes
 834  * sense to continue doing that there.
 835  */
 836 static void
 837 brw_process_driconf_options(struct brw_context *brw)
 838 {
 839    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 840    struct gl_context *ctx = &brw->ctx;
 841
 842    driOptionCache *options = &brw->optionCache;
 843    driParseConfigFiles(options, &brw->screen->optionCache,
 844                        brw->driContext->driScreenPriv->myNum,
 845                        "i965", NULL);
 846
 847    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 848    switch (bo_reuse_mode) {
 849    case DRI_CONF_BO_REUSE_DISABLED:
 850       break;
 851    case DRI_CONF_BO_REUSE_ALL:
 852       brw_bufmgr_enable_reuse(brw->bufmgr);
 853       break;
 854    }
 855
 856    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 857        brw->has_hiz = false;
 858        /* On gen6, you can only do separate stencil with HIZ. */
 859        if (devinfo->gen == 6)
 860           brw->has_separate_stencil = false;
 861    }
 862
 863    if (driQueryOptionb(options, "mesa_no_error"))
 864       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 865
 866    if (driQueryOptionb(options, "always_flush_batch")) {
 867       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 868       brw->always_flush_batch = true;
 869    }
 870
 871    if (driQueryOptionb(options, "always_flush_cache")) {
 872       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 873       brw->always_flush_cache = true;
 874    }
 875
 876    if (driQueryOptionb(options, "disable_throttling")) {
 877       fprintf(stderr, "disabling flush throttling\n");
 878       brw->disable_throttling = true;
 879    }
 880
 881    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 882
 883    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 884       brw->screen->compiler->precise_trig = true;
 885
 886    ctx->Const.ForceGLSLExtensionsWarn =
 887       driQueryOptionb(options, "force_glsl_extensions_warn");
 888
 889    ctx->Const.ForceGLSLVersion =
 890       driQueryOptioni(options, "force_glsl_version");
 891
 892    ctx->Const.DisableGLSLLineContinuations =
 893       driQueryOptionb(options, "disable_glsl_line_continuations");
 894
 895    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 896       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 897
 898    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 899       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 900
 901    ctx->Const.AllowHigherCompatVersion =
 902       driQueryOptionb(options, "allow_higher_compat_version");
 903
 904    ctx->Const.ForceGLSLAbsSqrt =
 905       driQueryOptionb(options, "force_glsl_abs_sqrt");
 906
 907    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 908
 909    brw->dual_color_blend_by_location =
 910       driQueryOptionb(options, "dual_color_blend_by_location");
 911
 912    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 913       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 914
 915    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 916    driComputeOptionsSha1(&brw->screen->optionCache,
 917                          ctx->Const.dri_config_options_sha1);
 918 }
 919
 920 GLboolean
 921 brwCreateContext(gl_api api,
 922                  const struct gl_config *mesaVis,
 923                  __DRIcontext *driContextPriv,
 924                  const struct __DriverContextConfig *ctx_config,
 925                  unsigned *dri_ctx_error,
 926                  void *sharedContextPrivate)
 927 {
 928    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 929    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 930    const struct gen_device_info *devinfo = &screen->devinfo;
 931    struct dd_function_table functions;
 932
 933    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 934     * provides us with context reset notifications.
 935     */
 936    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 937                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 938                             __DRI_CTX_FLAG_NO_ERROR;
 939
 940    if (screen->has_context_reset_notification)
 941       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 942
 943    if (ctx_config->flags & ~allowed_flags) {
 944       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 945       return false;
 946    }
 947
 948    if (ctx_config->attribute_mask &
 949        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 950          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 951       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 952       return false;
 953    }
 954
 955    bool notify_reset =
 956       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 957        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 958
 959    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 960    if (!brw) {
 961       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 962       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 963       return false;
 964    }
 965
 966    driContextPriv->driverPrivate = brw;
 967    brw->driContext = driContextPriv;
 968    brw->screen = screen;
 969    brw->bufmgr = screen->bufmgr;
 970
 971    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 972    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 973
 974    brw->has_swizzling = screen->hw_has_swizzling;
 975
 976    brw->isl_dev = screen->isl_dev;
 977
 978    brw->vs.base.stage = MESA_SHADER_VERTEX;
 979    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 980    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 981    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 982    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 983    brw->cs.base.stage = MESA_SHADER_COMPUTE;
 984
 985    brw_init_driver_functions(brw, &functions);
 986
 987    if (notify_reset)
 988       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 989
 990    struct gl_context *ctx = &brw->ctx;
 991
 992    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 993       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 994       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 995       intelDestroyContext(driContextPriv);
 996       return false;
 997    }
 998
 999    driContextSetFlags(ctx, ctx_config->flags);
1000
1001    /* Initialize the software rasterizer and helper modules.
1002     *
1003     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1004     * software fallbacks (which we have to support on legacy GL to do weird
1005     * glDrawPixels(), glBitmap(), and other functions).
1006     */
1007    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1008       _swrast_CreateContext(ctx);
1009    }
1010
1011    _vbo_CreateContext(ctx);
1012    if (ctx->swrast_context) {
1013       _tnl_CreateContext(ctx);
1014       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1015       _swsetup_CreateContext(ctx);
1016
1017       /* Configure swrast to match hardware characteristics: */
1018       _swrast_allow_pixel_fog(ctx, false);
1019       _swrast_allow_vertex_fog(ctx, true);
1020    }
1021
1022    _mesa_meta_init(ctx);
1023
1024    brw_process_driconf_options(brw);
1025
1026    if (INTEL_DEBUG & DEBUG_PERF)
1027       brw->perf_debug = true;
1028
1029    brw_initialize_cs_context_constants(brw);
1030    brw_initialize_context_constants(brw);
1031
1032    ctx->Const.ResetStrategy = notify_reset
1033       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1034
1035    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1036    _mesa_init_point(ctx);
1037
1038    intel_fbo_init(brw);
1039
1040    intel_batchbuffer_init(brw);
1041
1042    /* Create a new hardware context.  Using a hardware context means that
1043     * our GPU state will be saved/restored on context switch, allowing us
1044     * to assume that the GPU is in the same state we left it in.
1045     *
1046     * This is required for transform feedback buffer offsets, query objects,
1047     * and also allows us to reduce how much state we have to emit.
1048     */
1049    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1050    if (!brw->hw_ctx && devinfo->gen >= 6) {
1051       fprintf(stderr, "Failed to create hardware context.\n");
1052       intelDestroyContext(driContextPriv);
1053       return false;
1054    }
1055
1056    if (brw->hw_ctx) {
1057       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1058       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1059          switch (ctx_config->priority) {
1060          case __DRI_CTX_PRIORITY_LOW:
1061             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1062             break;
1063          case __DRI_CTX_PRIORITY_HIGH:
1064             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1065             break;
1066          }
1067       }
1068       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1069           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1070          fprintf(stderr,
1071                  "Failed to set priority [%d:%d] for hardware context.\n",
1072                  ctx_config->priority, hw_priority);
1073          intelDestroyContext(driContextPriv);
1074          return false;
1075       }
1076    }
1077
1078    if (brw_init_pipe_control(brw, devinfo)) {
1079       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1080       intelDestroyContext(driContextPriv);
1081       return false;
1082    }
1083
1084    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1085
1086    brw_init_state(brw);
1087
1088    intelInitExtensions(ctx);
1089
1090    brw_init_surface_formats(brw);
1091
1092    brw_blorp_init(brw);
1093
1094    brw->urb.size = devinfo->urb.size;
1095
1096    if (devinfo->gen == 6)
1097       brw->urb.gs_present = false;
1098
1099    brw->prim_restart.in_progress = false;
1100    brw->prim_restart.enable_cut_index = false;
1101    brw->gs.enabled = false;
1102    brw->clip.viewport_count = 1;
1103
1104    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1105
1106    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1107
1108    ctx->VertexProgram._MaintainTnlProgram = true;
1109    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1110
1111    brw_draw_init( brw );
1112
1113    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1114       /* Turn on some extra GL_ARB_debug_output generation. */
1115       brw->perf_debug = true;
1116    }
1117
1118    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1119       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1120       ctx->Const.RobustAccess = GL_TRUE;
1121    }
1122
1123    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1124       brw_init_shader_time(brw);
1125
1126    _mesa_override_extensions(ctx);
1127    _mesa_compute_version(ctx);
1128
1129    /* GL_ARB_gl_spirv */
1130    if (ctx->Extensions.ARB_gl_spirv) {
1131       brw_initialize_spirv_supported_capabilities(brw);
1132
1133       if (ctx->Extensions.ARB_spirv_extensions) {
1134          /* GL_ARB_spirv_extensions */
1135          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1136          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1137                                                &ctx->Const.SpirVCapabilities);
1138       }
1139    }
1140
1141    _mesa_initialize_dispatch_tables(ctx);
1142    _mesa_initialize_vbo_vtxfmt(ctx);
1143
1144    if (ctx->Extensions.INTEL_performance_query)
1145       brw_init_performance_queries(brw);
1146
1147    vbo_use_buffer_objects(ctx);
1148    vbo_always_unmap_buffers(ctx);
1149
1150    brw->ctx.Cache = brw->screen->disk_cache;
1151
1152    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1153        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1154       /* Loader supports multithreading, and so do we. */
1155       _mesa_glthread_init(ctx);
1156    }
1157
1158    return true;
1159 }
1160
1161 void
1162 intelDestroyContext(__DRIcontext * driContextPriv)
1163 {
1164    struct brw_context *brw =
1165       (struct brw_context *) driContextPriv->driverPrivate;
1166    struct gl_context *ctx = &brw->ctx;
1167
1168    GET_CURRENT_CONTEXT(curctx);
1169
1170    if (curctx == NULL) {
1171       /* No current context, but we need one to release
1172        * renderbuffer surface when we release framebuffer.
1173        * So temporarily bind the context.
1174        */
1175       _mesa_make_current(ctx, NULL, NULL);
1176    }
1177
1178    _mesa_glthread_destroy(&brw->ctx);
1179
1180    _mesa_meta_free(&brw->ctx);
1181
1182    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1183       /* Force a report. */
1184       brw->shader_time.report_time = 0;
1185
1186       brw_collect_and_report_shader_time(brw);
1187       brw_destroy_shader_time(brw);
1188    }
1189
1190    blorp_finish(&brw->blorp);
1191
1192    brw_destroy_state(brw);
1193    brw_draw_destroy(brw);
1194
1195    brw_bo_unreference(brw->curbe.curbe_bo);
1196
1197    brw_bo_unreference(brw->vs.base.scratch_bo);
1198    brw_bo_unreference(brw->tcs.base.scratch_bo);
1199    brw_bo_unreference(brw->tes.base.scratch_bo);
1200    brw_bo_unreference(brw->gs.base.scratch_bo);
1201    brw_bo_unreference(brw->wm.base.scratch_bo);
1202
1203    brw_bo_unreference(brw->vs.base.push_const_bo);
1204    brw_bo_unreference(brw->tcs.base.push_const_bo);
1205    brw_bo_unreference(brw->tes.base.push_const_bo);
1206    brw_bo_unreference(brw->gs.base.push_const_bo);
1207    brw_bo_unreference(brw->wm.base.push_const_bo);
1208
1209    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1210
1211    if (ctx->swrast_context) {
1212       _swsetup_DestroyContext(&brw->ctx);
1213       _tnl_DestroyContext(&brw->ctx);
1214    }
1215    _vbo_DestroyContext(&brw->ctx);
1216
1217    if (ctx->swrast_context)
1218       _swrast_DestroyContext(&brw->ctx);
1219
1220    brw_fini_pipe_control(brw);
1221    intel_batchbuffer_free(&brw->batch);
1222
1223    brw_bo_unreference(brw->throttle_batch[1]);
1224    brw_bo_unreference(brw->throttle_batch[0]);
1225    brw->throttle_batch[1] = NULL;
1226    brw->throttle_batch[0] = NULL;
1227
1228    driDestroyOptionCache(&brw->optionCache);
1229
1230    /* free the Mesa context */
1231    _mesa_free_context_data(&brw->ctx, true);
1232
1233    ralloc_free(brw);
1234    driContextPriv->driverPrivate = NULL;
1235 }
1236
1237 GLboolean
1238 intelUnbindContext(__DRIcontext * driContextPriv)
1239 {
1240    GET_CURRENT_CONTEXT(ctx);
1241    _mesa_glthread_finish(ctx);
1242
1243    /* Unset current context and dispath table */
1244    _mesa_make_current(NULL, NULL, NULL);
1245
1246    return true;
1247 }
1248
1249 /**
1250  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1251  * on window system framebuffers.
1252  *
1253  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1254  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1255  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1256  * for a visual where you're guaranteed to be capable, but it turns out that
1257  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1258  * incapable ones, because there's no difference between the two in resources
1259  * used.  Applications thus get built that accidentally rely on the default
1260  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1261  * great...
1262  *
1263  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1264  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1265  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1266  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1267  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1268  * and get no sRGB encode (assuming that both kinds of visual are available).
1269  * Thus our choice to support sRGB by default on our visuals for desktop would
1270  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1271  *
1272  * Unfortunately, renderbuffer setup happens before a context is created.  So
1273  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1274  * context (without an sRGB visual), we go turn that back off before anyone
1275  * finds out.
1276  */
1277 static void
1278 intel_gles3_srgb_workaround(struct brw_context *brw,
1279                             struct gl_framebuffer *fb)
1280 {
1281    struct gl_context *ctx = &brw->ctx;
1282
1283    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1284       return;
1285
1286    for (int i = 0; i < BUFFER_COUNT; i++) {
1287       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1288
1289       /* Check if sRGB was specifically asked for. */
1290       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1291       if (irb && irb->need_srgb)
1292          return;
1293
1294       if (rb)
1295          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1296    }
1297    /* Disable sRGB from framebuffers that are not compatible. */
1298    fb->Visual.sRGBCapable = false;
1299 }
1300
1301 GLboolean
1302 intelMakeCurrent(__DRIcontext * driContextPriv,
1303                  __DRIdrawable * driDrawPriv,
1304                  __DRIdrawable * driReadPriv)
1305 {
1306    struct brw_context *brw;
1307
1308    if (driContextPriv)
1309       brw = (struct brw_context *) driContextPriv->driverPrivate;
1310    else
1311       brw = NULL;
1312
1313    if (driContextPriv) {
1314       struct gl_context *ctx = &brw->ctx;
1315       struct gl_framebuffer *fb, *readFb;
1316
1317       if (driDrawPriv == NULL) {
1318          fb = _mesa_get_incomplete_framebuffer();
1319       } else {
1320          fb = driDrawPriv->driverPrivate;
1321          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1322       }
1323
1324       if (driReadPriv == NULL) {
1325          readFb = _mesa_get_incomplete_framebuffer();
1326       } else {
1327          readFb = driReadPriv->driverPrivate;
1328          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1329       }
1330
1331       /* The sRGB workaround changes the renderbuffer's format. We must change
1332        * the format before the renderbuffer's miptree get's allocated, otherwise
1333        * the formats of the renderbuffer and its miptree will differ.
1334        */
1335       intel_gles3_srgb_workaround(brw, fb);
1336       intel_gles3_srgb_workaround(brw, readFb);
1337
1338       /* If the context viewport hasn't been initialized, force a call out to
1339        * the loader to get buffers so we have a drawable size for the initial
1340        * viewport. */
1341       if (!brw->ctx.ViewportInitialized)
1342          intel_prepare_render(brw);
1343
1344       _mesa_make_current(ctx, fb, readFb);
1345    } else {
1346       GET_CURRENT_CONTEXT(ctx);
1347       _mesa_glthread_finish(ctx);
1348       _mesa_make_current(NULL, NULL, NULL);
1349    }
1350
1351    return true;
1352 }
1353
1354 void
1355 intel_resolve_for_dri2_flush(struct brw_context *brw,
1356                              __DRIdrawable *drawable)
1357 {
1358    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1359
1360    if (devinfo->gen < 6) {
1361       /* MSAA and fast color clear are not supported, so don't waste time
1362        * checking whether a resolve is needed.
1363        */
1364       return;
1365    }
1366
1367    struct gl_framebuffer *fb = drawable->driverPrivate;
1368    struct intel_renderbuffer *rb;
1369
1370    /* Usually, only the back buffer will need to be downsampled. However,
1371     * the front buffer will also need it if the user has rendered into it.
1372     */
1373    static const gl_buffer_index buffers[2] = {
1374          BUFFER_BACK_LEFT,
1375          BUFFER_FRONT_LEFT,
1376    };
1377
1378    for (int i = 0; i < 2; ++i) {
1379       rb = intel_get_renderbuffer(fb, buffers[i]);
1380       if (rb == NULL || rb->mt == NULL)
1381          continue;
1382       if (rb->mt->surf.samples == 1) {
1383          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1384                 rb->layer_count == 1);
1385          intel_miptree_prepare_external(brw, rb->mt);
1386       } else {
1387          intel_renderbuffer_downsample(brw, rb);
1388
1389          /* Call prepare_external on the single-sample miptree to do any
1390           * needed resolves prior to handing it off to the window system.
1391           * This is needed in the case that rb->singlesample_mt is Y-tiled
1392           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1393           * this case, the MSAA resolve above will write compressed data into
1394           * rb->singlesample_mt.
1395           *
1396           * TODO: Some day, if we decide to care about the tiny performance
1397           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1398           * we could detect this case and just allocate the single-sampled
1399           * miptree without aux.  However, that would be a lot of plumbing and
1400           * this is a rather exotic case so it's not really worth it.
1401           */
1402          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1403       }
1404    }
1405 }
1406
1407 static unsigned
1408 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1409 {
1410    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1411 }
1412
1413 static void
1414 intel_query_dri2_buffers(struct brw_context *brw,
1415                          __DRIdrawable *drawable,
1416                          __DRIbuffer **buffers,
1417                          int *count);
1418
1419 static void
1420 intel_process_dri2_buffer(struct brw_context *brw,
1421                           __DRIdrawable *drawable,
1422                           __DRIbuffer *buffer,
1423                           struct intel_renderbuffer *rb,
1424                           const char *buffer_name);
1425
1426 static void
1427 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1428
1429 static void
1430 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1431 {
1432    struct gl_framebuffer *fb = drawable->driverPrivate;
1433    struct intel_renderbuffer *rb;
1434    __DRIbuffer *buffers = NULL;
1435    int count;
1436    const char *region_name;
1437
1438    /* Set this up front, so that in case our buffers get invalidated
1439     * while we're getting new buffers, we don't clobber the stamp and
1440     * thus ignore the invalidate. */
1441    drawable->lastStamp = drawable->dri2.stamp;
1442
1443    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1444       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1445
1446    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1447
1448    if (buffers == NULL)
1449       return;
1450
1451    for (int i = 0; i < count; i++) {
1452        switch (buffers[i].attachment) {
1453        case __DRI_BUFFER_FRONT_LEFT:
1454            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1455            region_name = "dri2 front buffer";
1456            break;
1457
1458        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1459            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1460            region_name = "dri2 fake front buffer";
1461            break;
1462
1463        case __DRI_BUFFER_BACK_LEFT:
1464            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1465            region_name = "dri2 back buffer";
1466            break;
1467
1468        case __DRI_BUFFER_DEPTH:
1469        case __DRI_BUFFER_HIZ:
1470        case __DRI_BUFFER_DEPTH_STENCIL:
1471        case __DRI_BUFFER_STENCIL:
1472        case __DRI_BUFFER_ACCUM:
1473        default:
1474            fprintf(stderr,
1475                    "unhandled buffer attach event, attachment type %d\n",
1476                    buffers[i].attachment);
1477            return;
1478        }
1479
1480        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1481    }
1482
1483 }
1484
1485 void
1486 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1487 {
1488    struct brw_context *brw = context->driverPrivate;
1489    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1490
1491    /* Set this up front, so that in case our buffers get invalidated
1492     * while we're getting new buffers, we don't clobber the stamp and
1493     * thus ignore the invalidate. */
1494    drawable->lastStamp = drawable->dri2.stamp;
1495
1496    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1497       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1498
1499    if (dri_screen->image.loader)
1500       intel_update_image_buffers(brw, drawable);
1501    else
1502       intel_update_dri2_buffers(brw, drawable);
1503
1504    driUpdateFramebufferSize(&brw->ctx, drawable);
1505 }
1506
1507 /**
1508  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1509  * state is required.
1510  */
1511 void
1512 intel_prepare_render(struct brw_context *brw)
1513 {
1514    struct gl_context *ctx = &brw->ctx;
1515    __DRIcontext *driContext = brw->driContext;
1516    __DRIdrawable *drawable;
1517
1518    drawable = driContext->driDrawablePriv;
1519    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1520       if (drawable->lastStamp != drawable->dri2.stamp)
1521          intel_update_renderbuffers(driContext, drawable);
1522       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1523    }
1524
1525    drawable = driContext->driReadablePriv;
1526    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1527       if (drawable->lastStamp != drawable->dri2.stamp)
1528          intel_update_renderbuffers(driContext, drawable);
1529       driContext->dri2.read_stamp = drawable->dri2.stamp;
1530    }
1531
1532    /* If we're currently rendering to the front buffer, the rendering
1533     * that will happen next will probably dirty the front buffer.  So
1534     * mark it as dirty here.
1535     */
1536    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1537       brw->front_buffer_dirty = true;
1538
1539    if (brw->is_shared_buffer_bound) {
1540       /* Subsequent rendering will probably dirty the shared buffer. */
1541       brw->is_shared_buffer_dirty = true;
1542    }
1543 }
1544
1545 /**
1546  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1547  *
1548  * To determine which DRI buffers to request, examine the renderbuffers
1549  * attached to the drawable's framebuffer. Then request the buffers with
1550  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1551  *
1552  * This is called from intel_update_renderbuffers().
1553  *
1554  * \param drawable      Drawable whose buffers are queried.
1555  * \param buffers       [out] List of buffers returned by DRI2 query.
1556  * \param buffer_count  [out] Number of buffers returned.
1557  *
1558  * \see intel_update_renderbuffers()
1559  * \see DRI2GetBuffers()
1560  * \see DRI2GetBuffersWithFormat()
1561  */
1562 static void
1563 intel_query_dri2_buffers(struct brw_context *brw,
1564                          __DRIdrawable *drawable,
1565                          __DRIbuffer **buffers,
1566                          int *buffer_count)
1567 {
1568    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1569    struct gl_framebuffer *fb = drawable->driverPrivate;
1570    int i = 0;
1571    unsigned attachments[8];
1572
1573    struct intel_renderbuffer *front_rb;
1574    struct intel_renderbuffer *back_rb;
1575
1576    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1577    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1578
1579    memset(attachments, 0, sizeof(attachments));
1580    if ((_mesa_is_front_buffer_drawing(fb) ||
1581         _mesa_is_front_buffer_reading(fb) ||
1582         !back_rb) && front_rb) {
1583       /* If a fake front buffer is in use, then querying for
1584        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1585        * the real front buffer to the fake front buffer.  So before doing the
1586        * query, we need to make sure all the pending drawing has landed in the
1587        * real front buffer.
1588        */
1589       intel_batchbuffer_flush(brw);
1590       intel_flush_front(&brw->ctx);
1591
1592       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1593       attachments[i++] = intel_bits_per_pixel(front_rb);
1594    } else if (front_rb && brw->front_buffer_dirty) {
1595       /* We have pending front buffer rendering, but we aren't querying for a
1596        * front buffer.  If the front buffer we have is a fake front buffer,
1597        * the X server is going to throw it away when it processes the query.
1598        * So before doing the query, make sure all the pending drawing has
1599        * landed in the real front buffer.
1600        */
1601       intel_batchbuffer_flush(brw);
1602       intel_flush_front(&brw->ctx);
1603    }
1604
1605    if (back_rb) {
1606       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1607       attachments[i++] = intel_bits_per_pixel(back_rb);
1608    }
1609
1610    assert(i <= ARRAY_SIZE(attachments));
1611
1612    *buffers =
1613       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1614                                                     &drawable->w,
1615                                                     &drawable->h,
1616                                                     attachments, i / 2,
1617                                                     buffer_count,
1618                                                     drawable->loaderPrivate);
1619 }
1620
1621 /**
1622  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1623  *
1624  * This is called from intel_update_renderbuffers().
1625  *
1626  * \par Note:
1627  *    DRI buffers whose attachment point is DRI2BufferStencil or
1628  *    DRI2BufferDepthStencil are handled as special cases.
1629  *
1630  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1631  *        that is passed to brw_bo_gem_create_from_name().
1632  *
1633  * \see intel_update_renderbuffers()
1634  */
1635 static void
1636 intel_process_dri2_buffer(struct brw_context *brw,
1637                           __DRIdrawable *drawable,
1638                           __DRIbuffer *buffer,
1639                           struct intel_renderbuffer *rb,
1640                           const char *buffer_name)
1641 {
1642    struct gl_framebuffer *fb = drawable->driverPrivate;
1643    struct brw_bo *bo;
1644
1645    if (!rb)
1646       return;
1647
1648    unsigned num_samples = rb->Base.Base.NumSamples;
1649
1650    /* We try to avoid closing and reopening the same BO name, because the first
1651     * use of a mapping of the buffer involves a bunch of page faulting which is
1652     * moderately expensive.
1653     */
1654    struct intel_mipmap_tree *last_mt;
1655    if (num_samples == 0)
1656       last_mt = rb->mt;
1657    else
1658       last_mt = rb->singlesample_mt;
1659
1660    uint32_t old_name = 0;
1661    if (last_mt) {
1662        /* The bo already has a name because the miptree was created by a
1663         * previous call to intel_process_dri2_buffer(). If a bo already has a
1664         * name, then brw_bo_flink() is a low-cost getter.  It does not
1665         * create a new name.
1666         */
1667       brw_bo_flink(last_mt->bo, &old_name);
1668    }
1669
1670    if (old_name == buffer->name)
1671       return;
1672
1673    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1674       fprintf(stderr,
1675               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1676               buffer->name, buffer->attachment,
1677               buffer->cpp, buffer->pitch);
1678    }
1679
1680    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1681                                           buffer->name);
1682    if (!bo) {
1683       fprintf(stderr,
1684               "Failed to open BO for returned DRI2 buffer "
1685               "(%dx%d, %s, named %d).\n"
1686               "This is likely a bug in the X Server that will lead to a "
1687               "crash soon.\n",
1688               drawable->w, drawable->h, buffer_name, buffer->name);
1689       return;
1690    }
1691
1692    uint32_t tiling, swizzle;
1693    brw_bo_get_tiling(bo, &tiling, &swizzle);
1694
1695    struct intel_mipmap_tree *mt =
1696       intel_miptree_create_for_bo(brw,
1697                                   bo,
1698                                   intel_rb_format(rb),
1699                                   0,
1700                                   drawable->w,
1701                                   drawable->h,
1702                                   1,
1703                                   buffer->pitch,
1704                                   isl_tiling_from_i915_tiling(tiling),
1705                                   MIPTREE_CREATE_DEFAULT);
1706    if (!mt) {
1707       brw_bo_unreference(bo);
1708       return;
1709    }
1710
1711    /* We got this BO from X11.  We cana't assume that we have coherent texture
1712     * access because X may suddenly decide to use it for scan-out which would
1713     * destroy coherency.
1714     */
1715    bo->cache_coherent = false;
1716
1717    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1718                                                  drawable->w, drawable->h,
1719                                                  buffer->pitch)) {
1720       brw_bo_unreference(bo);
1721       intel_miptree_release(&mt);
1722       return;
1723    }
1724
1725    if (_mesa_is_front_buffer_drawing(fb) &&
1726        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1727         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1728        rb->Base.Base.NumSamples > 1) {
1729       intel_renderbuffer_upsample(brw, rb);
1730    }
1731
1732    assert(rb->mt);
1733
1734    brw_bo_unreference(bo);
1735 }
1736
1737 /**
1738  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1739  *
1740  * To determine which DRI buffers to request, examine the renderbuffers
1741  * attached to the drawable's framebuffer. Then request the buffers from
1742  * the image loader
1743  *
1744  * This is called from intel_update_renderbuffers().
1745  *
1746  * \param drawable      Drawable whose buffers are queried.
1747  * \param buffers       [out] List of buffers returned by DRI2 query.
1748  * \param buffer_count  [out] Number of buffers returned.
1749  *
1750  * \see intel_update_renderbuffers()
1751  */
1752
1753 static void
1754 intel_update_image_buffer(struct brw_context *intel,
1755                           __DRIdrawable *drawable,
1756                           struct intel_renderbuffer *rb,
1757                           __DRIimage *buffer,
1758                           enum __DRIimageBufferMask buffer_type)
1759 {
1760    struct gl_framebuffer *fb = drawable->driverPrivate;
1761
1762    if (!rb || !buffer->bo)
1763       return;
1764
1765    unsigned num_samples = rb->Base.Base.NumSamples;
1766
1767    /* Check and see if we're already bound to the right
1768     * buffer object
1769     */
1770    struct intel_mipmap_tree *last_mt;
1771    if (num_samples == 0)
1772       last_mt = rb->mt;
1773    else
1774       last_mt = rb->singlesample_mt;
1775
1776    if (last_mt && last_mt->bo == buffer->bo) {
1777       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1778          intel_miptree_make_shareable(intel, last_mt);
1779       }
1780       return;
1781    }
1782
1783    /* Only allow internal compression if samples == 0.  For multisampled
1784     * window system buffers, the only thing the single-sampled buffer is used
1785     * for is as a resolve target.  If we do any compression beyond what is
1786     * supported by the window system, we will just have to resolve so it's
1787     * probably better to just not bother.
1788     */
1789    const bool allow_internal_aux = (num_samples == 0);
1790
1791    struct intel_mipmap_tree *mt =
1792       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1793                                          intel_rb_format(rb),
1794                                          allow_internal_aux);
1795    if (!mt)
1796       return;
1797
1798    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1799                                                  buffer->width, buffer->height,
1800                                                  buffer->pitch)) {
1801       intel_miptree_release(&mt);
1802       return;
1803    }
1804
1805    if (_mesa_is_front_buffer_drawing(fb) &&
1806        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1807        rb->Base.Base.NumSamples > 1) {
1808       intel_renderbuffer_upsample(intel, rb);
1809    }
1810
1811    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1812       /* The compositor and the application may access this image
1813        * concurrently. The display hardware may even scanout the image while
1814        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1815        * concurrent access, so permanently disable aux for this miptree.
1816        *
1817        * Perhaps we could improve overall application performance by
1818        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1819        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1820        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1821        * approach to be highly dependent on the application's GL usage.
1822        *
1823        * I [chadv] expect clever disabling/reenabling to be counterproductive
1824        * in the use cases I care about: applications that render nearly
1825        * realtime handwriting to the surface while possibly undergiong
1826        * simultaneously scanout as a display plane. The app requires low
1827        * render latency. Even though the app spends most of its time in
1828        * shared-buffer mode, it also frequently transitions between
1829        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1830        * mode.  Visual sutter during the transitions should be avoided.
1831        *
1832        * In this case, I [chadv] believe reducing the GPU workload at
1833        * shared-buffer/double-buffer transitions would offer a smoother app
1834        * experience than any savings due to aux compression. But I've
1835        * collected no data to prove my theory.
1836        */
1837       intel_miptree_make_shareable(intel, mt);
1838    }
1839 }
1840
1841 static void
1842 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1843 {
1844    struct gl_framebuffer *fb = drawable->driverPrivate;
1845    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1846    struct intel_renderbuffer *front_rb;
1847    struct intel_renderbuffer *back_rb;
1848    struct __DRIimageList images;
1849    mesa_format format;
1850    uint32_t buffer_mask = 0;
1851    int ret;
1852
1853    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1854    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1855
1856    if (back_rb)
1857       format = intel_rb_format(back_rb);
1858    else if (front_rb)
1859       format = intel_rb_format(front_rb);
1860    else
1861       return;
1862
1863    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1864                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1865       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1866    }
1867
1868    if (back_rb)
1869       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1870
1871    ret = dri_screen->image.loader->getBuffers(drawable,
1872                                               driGLFormatToImageFormat(format),
1873                                               &drawable->dri2.stamp,
1874                                               drawable->loaderPrivate,
1875                                               buffer_mask,
1876                                               &images);
1877    if (!ret)
1878       return;
1879
1880    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1881       drawable->w = images.front->width;
1882       drawable->h = images.front->height;
1883       intel_update_image_buffer(brw,
1884                                 drawable,
1885                                 front_rb,
1886                                 images.front,
1887                                 __DRI_IMAGE_BUFFER_FRONT);
1888    }
1889
1890    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1891       drawable->w = images.back->width;
1892       drawable->h = images.back->height;
1893       intel_update_image_buffer(brw,
1894                                 drawable,
1895                                 back_rb,
1896                                 images.back,
1897                                 __DRI_IMAGE_BUFFER_BACK);
1898    }
1899
1900    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1901       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1902       drawable->w = images.back->width;
1903       drawable->h = images.back->height;
1904       intel_update_image_buffer(brw,
1905                                 drawable,
1906                                 back_rb,
1907                                 images.back,
1908                                 __DRI_IMAGE_BUFFER_SHARED);
1909       brw->is_shared_buffer_bound = true;
1910    } else {
1911       brw->is_shared_buffer_bound = false;
1912       brw->is_shared_buffer_dirty = false;
1913    }
1914 }