src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "util/imports.h"
  41 #include "main/macros.h"
  42 #include "main/points.h"
  43 #include "main/version.h"
  44 #include "main/vtxfmt.h"
  45 #include "main/texobj.h"
  46 #include "main/framebuffer.h"
  47 #include "main/stencil.h"
  48 #include "main/state.h"
  49 #include "main/spirv_extensions.h"
  50
  51 #include "vbo/vbo.h"
  52
  53 #include "drivers/common/driverfuncs.h"
  54 #include "drivers/common/meta.h"
  55 #include "utils.h"
  56
  57 #include "brw_context.h"
  58 #include "brw_defines.h"
  59 #include "brw_blorp.h"
  60 #include "brw_draw.h"
  61 #include "brw_state.h"
  62
  63 #include "intel_batchbuffer.h"
  64 #include "intel_buffer_objects.h"
  65 #include "intel_buffers.h"
  66 #include "intel_fbo.h"
  67 #include "intel_mipmap_tree.h"
  68 #include "intel_pixel.h"
  69 #include "intel_image.h"
  70 #include "intel_tex.h"
  71 #include "intel_tex_obj.h"
  72
  73 #include "swrast_setup/swrast_setup.h"
  74 #include "tnl/tnl.h"
  75 #include "tnl/t_pipeline.h"
  76 #include "util/ralloc.h"
  77 #include "util/debug.h"
  78 #include "util/disk_cache.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    static char buf[128];
 107    const char *name = gen_get_device_name(screen->deviceID);
 108
 109    if (!name)
 110       name = "Intel Unknown";
 111
 112    snprintf(buf, sizeof(buf), "Mesa DRI %s", name);
 113
 114    /* Braswell branding is funny, so we have to fix it up here */
 115    if (screen->deviceID == 0x22B1) {
 116       char *needle = strstr(buf, "XXX");
 117       if (needle)
 118          memcpy(needle, get_bsw_model(screen), 3);
 119    }
 120
 121    return buf;
 122 }
 123
 124 static const GLubyte *
 125 intel_get_string(struct gl_context * ctx, GLenum name)
 126 {
 127    const struct brw_context *const brw = brw_context(ctx);
 128
 129    switch (name) {
 130    case GL_VENDOR:
 131       return (GLubyte *) brw_vendor_string;
 132
 133    case GL_RENDERER:
 134       return
 135          (GLubyte *) brw_get_renderer_string(brw->screen);
 136
 137    default:
 138       return NULL;
 139    }
 140 }
 141
 142 static void
 143 brw_set_background_context(struct gl_context *ctx,
 144                            struct util_queue_monitoring *queue_info)
 145 {
 146    struct brw_context *brw = brw_context(ctx);
 147    __DRIcontext *driContext = brw->driContext;
 148    __DRIscreen *driScreen = driContext->driScreenPriv;
 149    const __DRIbackgroundCallableExtension *backgroundCallable =
 150       driScreen->dri2.backgroundCallable;
 151
 152    /* Note: Mesa will only call this function if we've called
 153     * _mesa_enable_multithreading().  We only do that if the loader exposed
 154     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 155     * backgroundCallable is not NULL.
 156     */
 157    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 158 }
 159
 160 static void
 161 intel_viewport(struct gl_context *ctx)
 162 {
 163    struct brw_context *brw = brw_context(ctx);
 164    __DRIcontext *driContext = brw->driContext;
 165
 166    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 167       if (driContext->driDrawablePriv)
 168          dri2InvalidateDrawable(driContext->driDrawablePriv);
 169       if (driContext->driReadablePriv)
 170          dri2InvalidateDrawable(driContext->driReadablePriv);
 171    }
 172 }
 173
 174 static void
 175 intel_update_framebuffer(struct gl_context *ctx,
 176                          struct gl_framebuffer *fb)
 177 {
 178    struct brw_context *brw = brw_context(ctx);
 179
 180    /* Quantize the derived default number of samples
 181     */
 182    fb->DefaultGeometry._NumSamples =
 183       intel_quantize_num_samples(brw->screen,
 184                                  fb->DefaultGeometry.NumSamples);
 185 }
 186
 187 static void
 188 intel_update_state(struct gl_context * ctx)
 189 {
 190    GLuint new_state = ctx->NewState;
 191    struct brw_context *brw = brw_context(ctx);
 192
 193    if (ctx->swrast_context)
 194       _swrast_InvalidateState(ctx, new_state);
 195
 196    brw->NewGLState |= new_state;
 197
 198    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 199       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 200
 201    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 202       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 203       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 204       brw->stencil_write_enabled =
 205          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 206    }
 207
 208    if (new_state & _NEW_POLYGON)
 209       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 210
 211    if (new_state & _NEW_BUFFERS) {
 212       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 213       if (ctx->DrawBuffer != ctx->ReadBuffer)
 214          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 215    }
 216 }
 217
 218 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 219
 220 static void
 221 intel_flush_front(struct gl_context *ctx)
 222 {
 223    struct brw_context *brw = brw_context(ctx);
 224    __DRIcontext *driContext = brw->driContext;
 225    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 226    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 227
 228    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 229       if (flushFront(dri_screen) && driDrawable &&
 230           driDrawable->loaderPrivate) {
 231
 232          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 233           *
 234           * This potentially resolves both front and back buffer. It
 235           * is unnecessary to resolve the back, but harms nothing except
 236           * performance. And no one cares about front-buffer render
 237           * performance.
 238           */
 239          intel_resolve_for_dri2_flush(brw, driDrawable);
 240          intel_batchbuffer_flush(brw);
 241
 242          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 243
 244          /* We set the dirty bit in intel_prepare_render() if we're
 245           * front buffer rendering once we get there.
 246           */
 247          brw->front_buffer_dirty = false;
 248       }
 249    }
 250 }
 251
 252 static void
 253 brw_display_shared_buffer(struct brw_context *brw)
 254 {
 255    __DRIcontext *dri_context = brw->driContext;
 256    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 257    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 258    int fence_fd = -1;
 259
 260    if (!brw->is_shared_buffer_bound)
 261       return;
 262
 263    if (!brw->is_shared_buffer_dirty)
 264       return;
 265
 266    if (brw->screen->has_exec_fence) {
 267       /* This function is always called during a flush operation, so there is
 268        * no need to flush again here. But we want to provide a fence_fd to the
 269        * loader, and a redundant flush is the easiest way to acquire one.
 270        */
 271       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 272          return;
 273    }
 274
 275    dri_screen->mutableRenderBuffer.loader
 276       ->displaySharedBuffer(dri_drawable, fence_fd,
 277                             dri_drawable->loaderPrivate);
 278    brw->is_shared_buffer_dirty = false;
 279 }
 280
 281 static void
 282 intel_glFlush(struct gl_context *ctx)
 283 {
 284    struct brw_context *brw = brw_context(ctx);
 285
 286    intel_batchbuffer_flush(brw);
 287    intel_flush_front(ctx);
 288    brw_display_shared_buffer(brw);
 289    brw->need_flush_throttle = true;
 290 }
 291
 292 static void
 293 intel_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state)
 294 {
 295    struct brw_context *brw = brw_context(ctx);
 296
 297    switch (cap) {
 298    case GL_BLACKHOLE_RENDER_INTEL:
 299       brw->frontend_noop = state;
 300       intel_batchbuffer_flush(brw);
 301       intel_batchbuffer_maybe_noop(brw);
 302       /* Because we started previous batches with a potential
 303        * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything
 304        * that was ever emitted after that never made it to the HW. So when the
 305        * blackhole state changes from NOOP->!NOOP reupload the entire state.
 306        */
 307       if (!brw->frontend_noop) {
 308          brw->NewGLState = ~0u;
 309          brw->ctx.NewDriverState = ~0ull;
 310       }
 311       break;
 312    default:
 313       break;
 314    }
 315 }
 316
 317 static void
 318 intel_finish(struct gl_context * ctx)
 319 {
 320    struct brw_context *brw = brw_context(ctx);
 321
 322    intel_glFlush(ctx);
 323
 324    if (brw->batch.last_bo)
 325       brw_bo_wait_rendering(brw->batch.last_bo);
 326 }
 327
 328 static void
 329 brw_init_driver_functions(struct brw_context *brw,
 330                           struct dd_function_table *functions)
 331 {
 332    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 333
 334    _mesa_init_driver_functions(functions);
 335
 336    /* GLX uses DRI2 invalidate events to handle window resizing.
 337     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 338     * which doesn't provide a mechanism for snooping the event queues.
 339     *
 340     * So EGL still relies on viewport hacks to handle window resizing.
 341     * This should go away with DRI3000.
 342     */
 343    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 344       functions->Viewport = intel_viewport;
 345
 346    functions->Enable = intel_glEnable;
 347    functions->Flush = intel_glFlush;
 348    functions->Finish = intel_finish;
 349    functions->GetString = intel_get_string;
 350    functions->UpdateState = intel_update_state;
 351
 352    brw_init_draw_functions(functions);
 353    intelInitTextureFuncs(functions);
 354    intelInitTextureImageFuncs(functions);
 355    intelInitTextureCopyImageFuncs(functions);
 356    intelInitCopyImageFuncs(functions);
 357    intelInitClearFuncs(functions);
 358    intelInitBufferFuncs(functions);
 359    intelInitPixelFuncs(functions);
 360    intelInitBufferObjectFuncs(functions);
 361    brw_init_syncobj_functions(functions);
 362    brw_init_object_purgeable_functions(functions);
 363
 364    brwInitFragProgFuncs( functions );
 365    brw_init_common_queryobj_functions(functions);
 366    if (devinfo->gen >= 8 || devinfo->is_haswell)
 367       hsw_init_queryobj_functions(functions);
 368    else if (devinfo->gen >= 6)
 369       gen6_init_queryobj_functions(functions);
 370    else
 371       gen4_init_queryobj_functions(functions);
 372    brw_init_compute_functions(functions);
 373    brw_init_conditional_render_functions(functions);
 374
 375    functions->GenerateMipmap = brw_generate_mipmap;
 376
 377    functions->QueryInternalFormat = brw_query_internal_format;
 378
 379    functions->NewTransformFeedback = brw_new_transform_feedback;
 380    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 381    if (can_do_mi_math_and_lrr(brw->screen)) {
 382       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 383       functions->EndTransformFeedback = hsw_end_transform_feedback;
 384       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 385       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 386    } else if (devinfo->gen >= 7) {
 387       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 388       functions->EndTransformFeedback = gen7_end_transform_feedback;
 389       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 390       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 391       functions->GetTransformFeedbackVertexCount =
 392          brw_get_transform_feedback_vertex_count;
 393    } else {
 394       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 395       functions->EndTransformFeedback = brw_end_transform_feedback;
 396       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 397       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 398       functions->GetTransformFeedbackVertexCount =
 399          brw_get_transform_feedback_vertex_count;
 400    }
 401
 402    if (devinfo->gen >= 6)
 403       functions->GetSamplePosition = gen6_get_sample_position;
 404
 405    /* GL_ARB_get_program_binary */
 406    brw_program_binary_init(brw->screen->deviceID);
 407    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 408    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 409    functions->ProgramBinaryDeserializeDriverBlob =
 410       brw_deserialize_program_binary;
 411
 412    if (brw->screen->disk_cache) {
 413       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 414    }
 415
 416    functions->SetBackgroundContext = brw_set_background_context;
 417 }
 418
 419 static void
 420 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 421 {
 422    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 423    struct gl_context *ctx = &brw->ctx;
 424
 425    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 426     * you should enable the extension only on gen7+, but just in case let's
 427     * assert it.
 428     */
 429    assert(devinfo->gen >= 7);
 430
 431    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 432    ctx->Const.SpirVCapabilities.draw_parameters = true;
 433    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 434    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 435    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 436    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 437    ctx->Const.SpirVCapabilities.tessellation = true;
 438    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 439    ctx->Const.SpirVCapabilities.variable_pointers = true;
 440    ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->gen >= 8;
 441 }
 442
 443 static void
 444 brw_initialize_context_constants(struct brw_context *brw)
 445 {
 446    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 447    struct gl_context *ctx = &brw->ctx;
 448    const struct brw_compiler *compiler = brw->screen->compiler;
 449
 450    const bool stage_exists[MESA_SHADER_STAGES] = {
 451       [MESA_SHADER_VERTEX] = true,
 452       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 453       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 454       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 455       [MESA_SHADER_FRAGMENT] = true,
 456       [MESA_SHADER_COMPUTE] =
 457          (_mesa_is_desktop_gl(ctx) &&
 458           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 459          (ctx->API == API_OPENGLES2 &&
 460           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 461    };
 462
 463    unsigned num_stages = 0;
 464    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 465       if (stage_exists[i])
 466          num_stages++;
 467    }
 468
 469    unsigned max_samplers =
 470       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 471
 472    ctx->Const.MaxDualSourceDrawBuffers = 1;
 473    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 474    ctx->Const.MaxCombinedShaderOutputResources =
 475       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 476
 477    /* The timestamp register we can read for glGetTimestamp() is
 478     * sometimes only 32 bits, before scaling to nanoseconds (depending
 479     * on kernel).
 480     *
 481     * Once scaled to nanoseconds the timestamp would roll over at a
 482     * non-power-of-two, so an application couldn't use
 483     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 484     * report 36 bits and truncate at that (rolling over 5 times as
 485     * often as the HW counter), and when the 32-bit counter rolls
 486     * over, it happens to also be at a rollover in the reported value
 487     * from near (1<<36) to 0.
 488     *
 489     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 490     * rolls over every ~69 seconds.
 491     */
 492    ctx->Const.QueryCounterBits.Timestamp = 36;
 493
 494    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 495    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 496    if (devinfo->gen >= 7) {
 497       ctx->Const.MaxRenderbufferSize = 16384;
 498       ctx->Const.MaxTextureSize = 16384;
 499       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 500    } else {
 501       ctx->Const.MaxRenderbufferSize = 8192;
 502       ctx->Const.MaxTextureSize = 8192;
 503       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 504    }
 505    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 506    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 507    ctx->Const.MaxTextureMbytes = 1536;
 508    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 509    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 510    ctx->Const.MaxTextureLodBias = 15.0;
 511    ctx->Const.StripTextureBorder = true;
 512    if (devinfo->gen >= 7) {
 513       ctx->Const.MaxProgramTextureGatherComponents = 4;
 514       ctx->Const.MinProgramTextureGatherOffset = -32;
 515       ctx->Const.MaxProgramTextureGatherOffset = 31;
 516    } else if (devinfo->gen == 6) {
 517       ctx->Const.MaxProgramTextureGatherComponents = 1;
 518       ctx->Const.MinProgramTextureGatherOffset = -8;
 519       ctx->Const.MaxProgramTextureGatherOffset = 7;
 520    }
 521
 522    ctx->Const.MaxUniformBlockSize = 65536;
 523
 524    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 525       struct gl_program_constants *prog = &ctx->Const.Program[i];
 526
 527       if (!stage_exists[i])
 528          continue;
 529
 530       prog->MaxTextureImageUnits = max_samplers;
 531
 532       prog->MaxUniformBlocks = BRW_MAX_UBO;
 533       prog->MaxCombinedUniformComponents =
 534          prog->MaxUniformComponents +
 535          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 536
 537       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 538       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 539       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 540       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 541    }
 542
 543    ctx->Const.MaxTextureUnits =
 544       MIN2(ctx->Const.MaxTextureCoordUnits,
 545            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 546
 547    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 548    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 549    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 550    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 551    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 552    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 553    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 554
 555
 556    /* Hardware only supports a limited number of transform feedback buffers.
 557     * So we need to override the Mesa default (which is based only on software
 558     * limits).
 559     */
 560    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 561
 562    /* On Gen6, in the worst case, we use up one binding table entry per
 563     * transform feedback component (see comments above the definition of
 564     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 565     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 566     * BRW_MAX_SOL_BINDINGS.
 567     *
 568     * In "separate components" mode, we need to divide this value by
 569     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 570     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 571     */
 572    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 573    ctx->Const.MaxTransformFeedbackSeparateComponents =
 574       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 575
 576    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 577       !can_do_mi_math_and_lrr(brw->screen);
 578
 579    int max_samples;
 580    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 581    const int clamp_max_samples =
 582       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 583
 584    if (clamp_max_samples < 0) {
 585       max_samples = msaa_modes[0];
 586    } else {
 587       /* Select the largest supported MSAA mode that does not exceed
 588        * clamp_max_samples.
 589        */
 590       max_samples = 0;
 591       for (int i = 0; msaa_modes[i] != 0; ++i) {
 592          if (msaa_modes[i] <= clamp_max_samples) {
 593             max_samples = msaa_modes[i];
 594             break;
 595          }
 596       }
 597    }
 598
 599    ctx->Const.MaxSamples = max_samples;
 600    ctx->Const.MaxColorTextureSamples = max_samples;
 601    ctx->Const.MaxDepthTextureSamples = max_samples;
 602    ctx->Const.MaxIntegerSamples = max_samples;
 603    ctx->Const.MaxImageSamples = 0;
 604
 605    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 606     * to map indices of rectangular grid to sample numbers within a pixel.
 607     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 608     * extension implementation. For more details see the comment above
 609     * gen6_set_sample_maps() definition.
 610     */
 611    gen6_set_sample_maps(ctx);
 612
 613    ctx->Const.MinLineWidth = 1.0;
 614    ctx->Const.MinLineWidthAA = 1.0;
 615    if (devinfo->gen >= 6) {
 616       ctx->Const.MaxLineWidth = 7.375;
 617       ctx->Const.MaxLineWidthAA = 7.375;
 618       ctx->Const.LineWidthGranularity = 0.125;
 619    } else {
 620       ctx->Const.MaxLineWidth = 7.0;
 621       ctx->Const.MaxLineWidthAA = 7.0;
 622       ctx->Const.LineWidthGranularity = 0.5;
 623    }
 624
 625    /* For non-antialiased lines, we have to round the line width to the
 626     * nearest whole number. Make sure that we don't advertise a line
 627     * width that, when rounded, will be beyond the actual hardware
 628     * maximum.
 629     */
 630    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 631
 632    ctx->Const.MinPointSize = 1.0;
 633    ctx->Const.MinPointSizeAA = 1.0;
 634    ctx->Const.MaxPointSize = 255.0;
 635    ctx->Const.MaxPointSizeAA = 255.0;
 636    ctx->Const.PointSizeGranularity = 1.0;
 637
 638    if (devinfo->gen >= 5 || devinfo->is_g4x)
 639       ctx->Const.MaxClipPlanes = 8;
 640
 641    ctx->Const.GLSLFragCoordIsSysVal = true;
 642    ctx->Const.GLSLFrontFacingIsSysVal = true;
 643    ctx->Const.GLSLTessLevelsAsInputs = true;
 644    ctx->Const.PrimitiveRestartForPatches = true;
 645
 646    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 647    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 648    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 649    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 650    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 651    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 652    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 653    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 654    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 655    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 656    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 657    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 658       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 659            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 660
 661    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 662    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 663    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 664    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 665    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 666    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 667    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 668    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 669    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 670       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 671            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 672
 673    /* Fragment shaders use real, 32-bit twos-complement integers for all
 674     * integer types.
 675     */
 676    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 677    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 678    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 679    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 680    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 681
 682    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 683    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 684    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 685    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 686    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 687
 688    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 689     * but we're not sure how it's actually done for vertex order,
 690     * that affect provoking vertex decision. Always use last vertex
 691     * convention for quad primitive which works as expected for now.
 692     */
 693    if (devinfo->gen >= 6)
 694       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 695
 696    ctx->Const.NativeIntegers = true;
 697
 698    /* Regarding the CMP instruction, the Ivybridge PRM says:
 699     *
 700     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 701     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 702     *    0xFFFFFFFF) is assigned to dst."
 703     *
 704     * but PRMs for earlier generations say
 705     *
 706     *   "In dword format, one GRF may store up to 8 results. When the register
 707     *    is used later as a vector of Booleans, as only LSB at each channel
 708     *    contains meaning [sic] data, software should make sure all higher bits
 709     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 710     *
 711     * We select the representation of a true boolean uniform to be ~0, and fix
 712     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 713     */
 714    ctx->Const.UniformBooleanTrue = ~0;
 715
 716    /* From the gen4 PRM, volume 4 page 127:
 717     *
 718     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 719     *      the base address of the first element of the surface, computed in
 720     *      software by adding the surface base address to the byte offset of
 721     *      the element in the buffer."
 722     *
 723     * However, unaligned accesses are slower, so enforce buffer alignment.
 724     *
 725     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 726     * restriction: the start of the buffer needs to be 32B aligned.
 727     */
 728    ctx->Const.UniformBufferOffsetAlignment = 32;
 729
 730    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 731     * that we can safely have the CPU and GPU writing the same SSBO on
 732     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 733     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 734     * be updating disjoint regions of the buffer simultaneously and that will
 735     * break if the regions overlap the same cacheline.
 736     */
 737    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 738    ctx->Const.TextureBufferOffsetAlignment = 16;
 739    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 740
 741    if (devinfo->gen >= 6) {
 742       ctx->Const.MaxVarying = 32;
 743       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 744       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 745          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 746       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 747       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 748       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 749       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 750       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 751       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 752    }
 753
 754    /* We want the GLSL compiler to emit code that uses condition codes */
 755    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 756       ctx->Const.ShaderCompilerOptions[i] =
 757          brw->screen->compiler->glsl_compiler_options[i];
 758    }
 759
 760    if (devinfo->gen >= 7) {
 761       ctx->Const.MaxViewportWidth = 32768;
 762       ctx->Const.MaxViewportHeight = 32768;
 763    }
 764
 765    /* ARB_viewport_array, OES_viewport_array */
 766    if (devinfo->gen >= 6) {
 767       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 768       ctx->Const.ViewportSubpixelBits = 8;
 769
 770       /* Cast to float before negating because MaxViewportWidth is unsigned.
 771        */
 772       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 773       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 774    }
 775
 776    /* ARB_gpu_shader5 */
 777    if (devinfo->gen >= 7)
 778       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 779
 780    /* ARB_framebuffer_no_attachments */
 781    ctx->Const.MaxFramebufferWidth = 16384;
 782    ctx->Const.MaxFramebufferHeight = 16384;
 783    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 784    ctx->Const.MaxFramebufferSamples = max_samples;
 785
 786    /* OES_primitive_bounding_box */
 787    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 788
 789    /* TODO: We should be able to use STD430 packing by default on all hardware
 790     * but some piglit tests [1] currently fail on SNB when this is enabled.
 791     * The problem is the messages we're using for doing uniform pulls
 792     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 793     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 794     * sampler which doesn't have these restrictions.
 795     *
 796     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 797     * pull an entire cache line at a time for constant offset loads both of
 798     * which support almost any alignment.
 799     *
 800     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 801     */
 802    if (devinfo->gen >= 7)
 803       ctx->Const.UseSTD430AsDefaultPacking = true;
 804
 805    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 806       ctx->Const.AllowMappedBuffersDuringExecution = true;
 807
 808    /* GL_ARB_get_program_binary */
 809    ctx->Const.NumProgramBinaryFormats = 1;
 810 }
 811
 812 static void
 813 brw_initialize_cs_context_constants(struct brw_context *brw)
 814 {
 815    struct gl_context *ctx = &brw->ctx;
 816    const struct intel_screen *screen = brw->screen;
 817    struct gen_device_info *devinfo = &brw->screen->devinfo;
 818
 819    /* FINISHME: Do this for all platforms that the kernel supports */
 820    if (devinfo->is_cherryview &&
 821        screen->subslice_total > 0 && screen->eu_total > 0) {
 822       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 823       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 824
 825       /* Fuse configurations may give more threads than expected, never less. */
 826       if (max_cs_threads > devinfo->max_cs_threads)
 827          devinfo->max_cs_threads = max_cs_threads;
 828    }
 829
 830    /* Maximum number of scalar compute shader invocations that can be run in
 831     * parallel in the same subslice assuming SIMD32 dispatch.
 832     *
 833     * We don't advertise more than 64 threads, because we are limited to 64 by
 834     * our usage of thread_width_max in the gpgpu walker command. This only
 835     * currently impacts Haswell, which otherwise might be able to advertise 70
 836     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 837     * required the number of invocation needed for ARB_compute_shader.
 838     */
 839    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 840    const uint32_t max_invocations = 32 * max_threads;
 841    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 842    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 843    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 844    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 845    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 846 }
 847
 848 /**
 849  * Process driconf (drirc) options, setting appropriate context flags.
 850  *
 851  * intelInitExtensions still pokes at optionCache directly, in order to
 852  * avoid advertising various extensions.  No flags are set, so it makes
 853  * sense to continue doing that there.
 854  */
 855 static void
 856 brw_process_driconf_options(struct brw_context *brw)
 857 {
 858    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 859    struct gl_context *ctx = &brw->ctx;
 860
 861    driOptionCache *options = &brw->optionCache;
 862    driParseConfigFiles(options, &brw->screen->optionCache,
 863                        brw->driContext->driScreenPriv->myNum,
 864                        "i965", NULL, NULL, 0);
 865
 866    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 867        brw->has_hiz = false;
 868        /* On gen6, you can only do separate stencil with HIZ. */
 869        if (devinfo->gen == 6)
 870           brw->has_separate_stencil = false;
 871    }
 872
 873    if (driQueryOptionb(options, "mesa_no_error"))
 874       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 875
 876    if (driQueryOptionb(options, "always_flush_batch")) {
 877       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 878       brw->always_flush_batch = true;
 879    }
 880
 881    if (driQueryOptionb(options, "always_flush_cache")) {
 882       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 883       brw->always_flush_cache = true;
 884    }
 885
 886    if (driQueryOptionb(options, "disable_throttling")) {
 887       fprintf(stderr, "disabling flush throttling\n");
 888       brw->disable_throttling = true;
 889    }
 890
 891    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 892
 893    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 894       brw->screen->compiler->precise_trig = true;
 895
 896    ctx->Const.ForceGLSLExtensionsWarn =
 897       driQueryOptionb(options, "force_glsl_extensions_warn");
 898
 899    ctx->Const.ForceGLSLVersion =
 900       driQueryOptioni(options, "force_glsl_version");
 901
 902    ctx->Const.DisableGLSLLineContinuations =
 903       driQueryOptionb(options, "disable_glsl_line_continuations");
 904
 905    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 906       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 907
 908    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 909       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 910
 911    ctx->Const.AllowHigherCompatVersion =
 912       driQueryOptionb(options, "allow_higher_compat_version");
 913
 914    ctx->Const.ForceGLSLAbsSqrt =
 915       driQueryOptionb(options, "force_glsl_abs_sqrt");
 916
 917    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 918
 919    brw->dual_color_blend_by_location =
 920       driQueryOptionb(options, "dual_color_blend_by_location");
 921
 922    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 923       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 924
 925    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 926    driComputeOptionsSha1(&brw->screen->optionCache,
 927                          ctx->Const.dri_config_options_sha1);
 928 }
 929
 930 GLboolean
 931 brwCreateContext(gl_api api,
 932                  const struct gl_config *mesaVis,
 933                  __DRIcontext *driContextPriv,
 934                  const struct __DriverContextConfig *ctx_config,
 935                  unsigned *dri_ctx_error,
 936                  void *sharedContextPrivate)
 937 {
 938    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 939    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 940    const struct gen_device_info *devinfo = &screen->devinfo;
 941    struct dd_function_table functions;
 942
 943    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 944     * provides us with context reset notifications.
 945     */
 946    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 947                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 948                             __DRI_CTX_FLAG_NO_ERROR;
 949
 950    if (screen->has_context_reset_notification)
 951       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 952
 953    if (ctx_config->flags & ~allowed_flags) {
 954       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 955       return false;
 956    }
 957
 958    if (ctx_config->attribute_mask &
 959        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 960          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 961       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 962       return false;
 963    }
 964
 965    bool notify_reset =
 966       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 967        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 968
 969    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 970    if (!brw) {
 971       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 972       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 973       return false;
 974    }
 975    brw->perf_ctx = gen_perf_new_context(brw);
 976
 977    driContextPriv->driverPrivate = brw;
 978    brw->driContext = driContextPriv;
 979    brw->screen = screen;
 980    brw->bufmgr = screen->bufmgr;
 981
 982    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 983    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 984
 985    brw->has_swizzling = screen->hw_has_swizzling;
 986
 987    brw->isl_dev = screen->isl_dev;
 988
 989    brw->vs.base.stage = MESA_SHADER_VERTEX;
 990    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 991    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 992    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 993    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 994    brw->cs.base.stage = MESA_SHADER_COMPUTE;
 995
 996    brw_init_driver_functions(brw, &functions);
 997
 998    if (notify_reset)
 999       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
1000
1001    brw_process_driconf_options(brw);
1002
1003    if (api == API_OPENGL_CORE &&
1004        driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
1005       api = API_OPENGL_COMPAT;
1006    }
1007
1008    struct gl_context *ctx = &brw->ctx;
1009
1010    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
1011       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1012       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
1013       intelDestroyContext(driContextPriv);
1014       return false;
1015    }
1016
1017    driContextSetFlags(ctx, ctx_config->flags);
1018
1019    /* Initialize the software rasterizer and helper modules.
1020     *
1021     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1022     * software fallbacks (which we have to support on legacy GL to do weird
1023     * glDrawPixels(), glBitmap(), and other functions).
1024     */
1025    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1026       _swrast_CreateContext(ctx);
1027    }
1028
1029    _vbo_CreateContext(ctx, true);
1030    if (ctx->swrast_context) {
1031       _tnl_CreateContext(ctx);
1032       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1033       _swsetup_CreateContext(ctx);
1034
1035       /* Configure swrast to match hardware characteristics: */
1036       _swrast_allow_pixel_fog(ctx, false);
1037       _swrast_allow_vertex_fog(ctx, true);
1038    }
1039
1040    _mesa_meta_init(ctx);
1041
1042    if (INTEL_DEBUG & DEBUG_PERF)
1043       brw->perf_debug = true;
1044
1045    brw_initialize_cs_context_constants(brw);
1046    brw_initialize_context_constants(brw);
1047
1048    ctx->Const.ResetStrategy = notify_reset
1049       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1050
1051    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1052    _mesa_init_point(ctx);
1053
1054    intel_fbo_init(brw);
1055
1056    intel_batchbuffer_init(brw);
1057
1058    /* Create a new hardware context.  Using a hardware context means that
1059     * our GPU state will be saved/restored on context switch, allowing us
1060     * to assume that the GPU is in the same state we left it in.
1061     *
1062     * This is required for transform feedback buffer offsets, query objects,
1063     * and also allows us to reduce how much state we have to emit.
1064     */
1065    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1066    if (!brw->hw_ctx && devinfo->gen >= 6) {
1067       fprintf(stderr, "Failed to create hardware context.\n");
1068       intelDestroyContext(driContextPriv);
1069       return false;
1070    }
1071
1072    if (brw->hw_ctx) {
1073       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1074       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1075          switch (ctx_config->priority) {
1076          case __DRI_CTX_PRIORITY_LOW:
1077             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1078             break;
1079          case __DRI_CTX_PRIORITY_HIGH:
1080             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1081             break;
1082          }
1083       }
1084       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1085           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1086          fprintf(stderr,
1087                  "Failed to set priority [%d:%d] for hardware context.\n",
1088                  ctx_config->priority, hw_priority);
1089          intelDestroyContext(driContextPriv);
1090          return false;
1091       }
1092    }
1093
1094    if (brw_init_pipe_control(brw, devinfo)) {
1095       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1096       intelDestroyContext(driContextPriv);
1097       return false;
1098    }
1099
1100    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1101
1102    brw_init_state(brw);
1103
1104    intelInitExtensions(ctx);
1105
1106    brw_init_surface_formats(brw);
1107
1108    brw_blorp_init(brw);
1109
1110    brw->urb.size = devinfo->urb.size;
1111
1112    if (devinfo->gen == 6)
1113       brw->urb.gs_present = false;
1114
1115    brw->prim_restart.in_progress = false;
1116    brw->prim_restart.enable_cut_index = false;
1117    brw->gs.enabled = false;
1118    brw->clip.viewport_count = 1;
1119
1120    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1121
1122    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1123
1124    ctx->VertexProgram._MaintainTnlProgram = true;
1125    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1126
1127    brw_draw_init( brw );
1128
1129    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1130       /* Turn on some extra GL_ARB_debug_output generation. */
1131       brw->perf_debug = true;
1132    }
1133
1134    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1135       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1136       ctx->Const.RobustAccess = GL_TRUE;
1137    }
1138
1139    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1140       brw_init_shader_time(brw);
1141
1142    _mesa_override_extensions(ctx);
1143    _mesa_compute_version(ctx);
1144
1145    /* GL_ARB_gl_spirv */
1146    if (ctx->Extensions.ARB_gl_spirv) {
1147       brw_initialize_spirv_supported_capabilities(brw);
1148
1149       if (ctx->Extensions.ARB_spirv_extensions) {
1150          /* GL_ARB_spirv_extensions */
1151          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1152          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1153                                                &ctx->Const.SpirVCapabilities);
1154       }
1155    }
1156
1157    _mesa_initialize_dispatch_tables(ctx);
1158    _mesa_initialize_vbo_vtxfmt(ctx);
1159
1160    if (ctx->Extensions.INTEL_performance_query)
1161       brw_init_performance_queries(brw);
1162
1163    brw->ctx.Cache = brw->screen->disk_cache;
1164
1165    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1166        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1167       /* Loader supports multithreading, and so do we. */
1168       _mesa_glthread_init(ctx);
1169    }
1170
1171    return true;
1172 }
1173
1174 void
1175 intelDestroyContext(__DRIcontext * driContextPriv)
1176 {
1177    struct brw_context *brw =
1178       (struct brw_context *) driContextPriv->driverPrivate;
1179    struct gl_context *ctx = &brw->ctx;
1180
1181    GET_CURRENT_CONTEXT(curctx);
1182
1183    if (curctx == NULL) {
1184       /* No current context, but we need one to release
1185        * renderbuffer surface when we release framebuffer.
1186        * So temporarily bind the context.
1187        */
1188       _mesa_make_current(ctx, NULL, NULL);
1189    }
1190
1191    _mesa_glthread_destroy(&brw->ctx);
1192
1193    _mesa_meta_free(&brw->ctx);
1194
1195    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1196       /* Force a report. */
1197       brw->shader_time.report_time = 0;
1198
1199       brw_collect_and_report_shader_time(brw);
1200       brw_destroy_shader_time(brw);
1201    }
1202
1203    blorp_finish(&brw->blorp);
1204
1205    brw_destroy_state(brw);
1206    brw_draw_destroy(brw);
1207
1208    brw_bo_unreference(brw->curbe.curbe_bo);
1209
1210    brw_bo_unreference(brw->vs.base.scratch_bo);
1211    brw_bo_unreference(brw->tcs.base.scratch_bo);
1212    brw_bo_unreference(brw->tes.base.scratch_bo);
1213    brw_bo_unreference(brw->gs.base.scratch_bo);
1214    brw_bo_unreference(brw->wm.base.scratch_bo);
1215
1216    brw_bo_unreference(brw->vs.base.push_const_bo);
1217    brw_bo_unreference(brw->tcs.base.push_const_bo);
1218    brw_bo_unreference(brw->tes.base.push_const_bo);
1219    brw_bo_unreference(brw->gs.base.push_const_bo);
1220    brw_bo_unreference(brw->wm.base.push_const_bo);
1221
1222    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1223
1224    if (ctx->swrast_context) {
1225       _swsetup_DestroyContext(&brw->ctx);
1226       _tnl_DestroyContext(&brw->ctx);
1227    }
1228    _vbo_DestroyContext(&brw->ctx);
1229
1230    if (ctx->swrast_context)
1231       _swrast_DestroyContext(&brw->ctx);
1232
1233    brw_fini_pipe_control(brw);
1234    intel_batchbuffer_free(&brw->batch);
1235
1236    brw_bo_unreference(brw->throttle_batch[1]);
1237    brw_bo_unreference(brw->throttle_batch[0]);
1238    brw->throttle_batch[1] = NULL;
1239    brw->throttle_batch[0] = NULL;
1240
1241    driDestroyOptionCache(&brw->optionCache);
1242
1243    /* free the Mesa context */
1244    _mesa_free_context_data(&brw->ctx);
1245
1246    ralloc_free(brw);
1247    driContextPriv->driverPrivate = NULL;
1248 }
1249
1250 GLboolean
1251 intelUnbindContext(__DRIcontext * driContextPriv)
1252 {
1253    struct gl_context *ctx = driContextPriv->driverPrivate;
1254    _mesa_glthread_finish(ctx);
1255
1256    /* Unset current context and dispath table */
1257    _mesa_make_current(NULL, NULL, NULL);
1258
1259    return true;
1260 }
1261
1262 /**
1263  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1264  * on window system framebuffers.
1265  *
1266  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1267  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1268  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1269  * for a visual where you're guaranteed to be capable, but it turns out that
1270  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1271  * incapable ones, because there's no difference between the two in resources
1272  * used.  Applications thus get built that accidentally rely on the default
1273  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1274  * great...
1275  *
1276  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1277  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1278  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1279  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1280  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1281  * and get no sRGB encode (assuming that both kinds of visual are available).
1282  * Thus our choice to support sRGB by default on our visuals for desktop would
1283  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1284  *
1285  * Unfortunately, renderbuffer setup happens before a context is created.  So
1286  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1287  * context (without an sRGB visual), we go turn that back off before anyone
1288  * finds out.
1289  */
1290 static void
1291 intel_gles3_srgb_workaround(struct brw_context *brw,
1292                             struct gl_framebuffer *fb)
1293 {
1294    struct gl_context *ctx = &brw->ctx;
1295
1296    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1297       return;
1298
1299    for (int i = 0; i < BUFFER_COUNT; i++) {
1300       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1301
1302       /* Check if sRGB was specifically asked for. */
1303       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1304       if (irb && irb->need_srgb)
1305          return;
1306
1307       if (rb)
1308          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1309    }
1310    /* Disable sRGB from framebuffers that are not compatible. */
1311    fb->Visual.sRGBCapable = false;
1312 }
1313
1314 GLboolean
1315 intelMakeCurrent(__DRIcontext * driContextPriv,
1316                  __DRIdrawable * driDrawPriv,
1317                  __DRIdrawable * driReadPriv)
1318 {
1319    struct brw_context *brw;
1320
1321    if (driContextPriv)
1322       brw = (struct brw_context *) driContextPriv->driverPrivate;
1323    else
1324       brw = NULL;
1325
1326    if (driContextPriv) {
1327       struct gl_context *ctx = &brw->ctx;
1328       struct gl_framebuffer *fb, *readFb;
1329
1330       if (driDrawPriv == NULL) {
1331          fb = _mesa_get_incomplete_framebuffer();
1332       } else {
1333          fb = driDrawPriv->driverPrivate;
1334          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1335       }
1336
1337       if (driReadPriv == NULL) {
1338          readFb = _mesa_get_incomplete_framebuffer();
1339       } else {
1340          readFb = driReadPriv->driverPrivate;
1341          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1342       }
1343
1344       /* The sRGB workaround changes the renderbuffer's format. We must change
1345        * the format before the renderbuffer's miptree get's allocated, otherwise
1346        * the formats of the renderbuffer and its miptree will differ.
1347        */
1348       intel_gles3_srgb_workaround(brw, fb);
1349       intel_gles3_srgb_workaround(brw, readFb);
1350
1351       /* If the context viewport hasn't been initialized, force a call out to
1352        * the loader to get buffers so we have a drawable size for the initial
1353        * viewport. */
1354       if (!brw->ctx.ViewportInitialized)
1355          intel_prepare_render(brw);
1356
1357       _mesa_make_current(ctx, fb, readFb);
1358    } else {
1359       GET_CURRENT_CONTEXT(ctx);
1360       _mesa_glthread_finish(ctx);
1361       _mesa_make_current(NULL, NULL, NULL);
1362    }
1363
1364    return true;
1365 }
1366
1367 void
1368 intel_resolve_for_dri2_flush(struct brw_context *brw,
1369                              __DRIdrawable *drawable)
1370 {
1371    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1372
1373    if (devinfo->gen < 6) {
1374       /* MSAA and fast color clear are not supported, so don't waste time
1375        * checking whether a resolve is needed.
1376        */
1377       return;
1378    }
1379
1380    struct gl_framebuffer *fb = drawable->driverPrivate;
1381    struct intel_renderbuffer *rb;
1382
1383    /* Usually, only the back buffer will need to be downsampled. However,
1384     * the front buffer will also need it if the user has rendered into it.
1385     */
1386    static const gl_buffer_index buffers[2] = {
1387          BUFFER_BACK_LEFT,
1388          BUFFER_FRONT_LEFT,
1389    };
1390
1391    for (int i = 0; i < 2; ++i) {
1392       rb = intel_get_renderbuffer(fb, buffers[i]);
1393       if (rb == NULL || rb->mt == NULL)
1394          continue;
1395       if (rb->mt->surf.samples == 1) {
1396          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1397                 rb->layer_count == 1);
1398          intel_miptree_prepare_external(brw, rb->mt);
1399       } else {
1400          intel_renderbuffer_downsample(brw, rb);
1401
1402          /* Call prepare_external on the single-sample miptree to do any
1403           * needed resolves prior to handing it off to the window system.
1404           * This is needed in the case that rb->singlesample_mt is Y-tiled
1405           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1406           * this case, the MSAA resolve above will write compressed data into
1407           * rb->singlesample_mt.
1408           *
1409           * TODO: Some day, if we decide to care about the tiny performance
1410           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1411           * we could detect this case and just allocate the single-sampled
1412           * miptree without aux.  However, that would be a lot of plumbing and
1413           * this is a rather exotic case so it's not really worth it.
1414           */
1415          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1416       }
1417    }
1418 }
1419
1420 static unsigned
1421 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1422 {
1423    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1424 }
1425
1426 static void
1427 intel_query_dri2_buffers(struct brw_context *brw,
1428                          __DRIdrawable *drawable,
1429                          __DRIbuffer **buffers,
1430                          int *count);
1431
1432 static void
1433 intel_process_dri2_buffer(struct brw_context *brw,
1434                           __DRIdrawable *drawable,
1435                           __DRIbuffer *buffer,
1436                           struct intel_renderbuffer *rb,
1437                           const char *buffer_name);
1438
1439 static void
1440 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1441
1442 static void
1443 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1444 {
1445    struct gl_framebuffer *fb = drawable->driverPrivate;
1446    struct intel_renderbuffer *rb;
1447    __DRIbuffer *buffers = NULL;
1448    int count;
1449    const char *region_name;
1450
1451    /* Set this up front, so that in case our buffers get invalidated
1452     * while we're getting new buffers, we don't clobber the stamp and
1453     * thus ignore the invalidate. */
1454    drawable->lastStamp = drawable->dri2.stamp;
1455
1456    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1457       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1458
1459    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1460
1461    if (buffers == NULL)
1462       return;
1463
1464    for (int i = 0; i < count; i++) {
1465        switch (buffers[i].attachment) {
1466        case __DRI_BUFFER_FRONT_LEFT:
1467            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1468            region_name = "dri2 front buffer";
1469            break;
1470
1471        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1472            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1473            region_name = "dri2 fake front buffer";
1474            break;
1475
1476        case __DRI_BUFFER_BACK_LEFT:
1477            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1478            region_name = "dri2 back buffer";
1479            break;
1480
1481        case __DRI_BUFFER_DEPTH:
1482        case __DRI_BUFFER_HIZ:
1483        case __DRI_BUFFER_DEPTH_STENCIL:
1484        case __DRI_BUFFER_STENCIL:
1485        case __DRI_BUFFER_ACCUM:
1486        default:
1487            fprintf(stderr,
1488                    "unhandled buffer attach event, attachment type %d\n",
1489                    buffers[i].attachment);
1490            return;
1491        }
1492
1493        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1494    }
1495
1496 }
1497
1498 void
1499 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1500 {
1501    struct brw_context *brw = context->driverPrivate;
1502    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1503
1504    /* Set this up front, so that in case our buffers get invalidated
1505     * while we're getting new buffers, we don't clobber the stamp and
1506     * thus ignore the invalidate. */
1507    drawable->lastStamp = drawable->dri2.stamp;
1508
1509    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1510       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1511
1512    if (dri_screen->image.loader)
1513       intel_update_image_buffers(brw, drawable);
1514    else
1515       intel_update_dri2_buffers(brw, drawable);
1516
1517    driUpdateFramebufferSize(&brw->ctx, drawable);
1518 }
1519
1520 /**
1521  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1522  * state is required.
1523  */
1524 void
1525 intel_prepare_render(struct brw_context *brw)
1526 {
1527    struct gl_context *ctx = &brw->ctx;
1528    __DRIcontext *driContext = brw->driContext;
1529    __DRIdrawable *drawable;
1530
1531    drawable = driContext->driDrawablePriv;
1532    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1533       if (drawable->lastStamp != drawable->dri2.stamp)
1534          intel_update_renderbuffers(driContext, drawable);
1535       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1536    }
1537
1538    drawable = driContext->driReadablePriv;
1539    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1540       if (drawable->lastStamp != drawable->dri2.stamp)
1541          intel_update_renderbuffers(driContext, drawable);
1542       driContext->dri2.read_stamp = drawable->dri2.stamp;
1543    }
1544
1545    /* If we're currently rendering to the front buffer, the rendering
1546     * that will happen next will probably dirty the front buffer.  So
1547     * mark it as dirty here.
1548     */
1549    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
1550        ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
1551       brw->front_buffer_dirty = true;
1552    }
1553
1554    if (brw->is_shared_buffer_bound) {
1555       /* Subsequent rendering will probably dirty the shared buffer. */
1556       brw->is_shared_buffer_dirty = true;
1557    }
1558 }
1559
1560 /**
1561  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1562  *
1563  * To determine which DRI buffers to request, examine the renderbuffers
1564  * attached to the drawable's framebuffer. Then request the buffers with
1565  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1566  *
1567  * This is called from intel_update_renderbuffers().
1568  *
1569  * \param drawable      Drawable whose buffers are queried.
1570  * \param buffers       [out] List of buffers returned by DRI2 query.
1571  * \param buffer_count  [out] Number of buffers returned.
1572  *
1573  * \see intel_update_renderbuffers()
1574  * \see DRI2GetBuffers()
1575  * \see DRI2GetBuffersWithFormat()
1576  */
1577 static void
1578 intel_query_dri2_buffers(struct brw_context *brw,
1579                          __DRIdrawable *drawable,
1580                          __DRIbuffer **buffers,
1581                          int *buffer_count)
1582 {
1583    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1584    struct gl_framebuffer *fb = drawable->driverPrivate;
1585    int i = 0;
1586    unsigned attachments[8];
1587
1588    struct intel_renderbuffer *front_rb;
1589    struct intel_renderbuffer *back_rb;
1590
1591    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1592    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1593
1594    memset(attachments, 0, sizeof(attachments));
1595    if ((_mesa_is_front_buffer_drawing(fb) ||
1596         _mesa_is_front_buffer_reading(fb) ||
1597         !back_rb) && front_rb) {
1598       /* If a fake front buffer is in use, then querying for
1599        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1600        * the real front buffer to the fake front buffer.  So before doing the
1601        * query, we need to make sure all the pending drawing has landed in the
1602        * real front buffer.
1603        */
1604       intel_batchbuffer_flush(brw);
1605       intel_flush_front(&brw->ctx);
1606
1607       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1608       attachments[i++] = intel_bits_per_pixel(front_rb);
1609    } else if (front_rb && brw->front_buffer_dirty) {
1610       /* We have pending front buffer rendering, but we aren't querying for a
1611        * front buffer.  If the front buffer we have is a fake front buffer,
1612        * the X server is going to throw it away when it processes the query.
1613        * So before doing the query, make sure all the pending drawing has
1614        * landed in the real front buffer.
1615        */
1616       intel_batchbuffer_flush(brw);
1617       intel_flush_front(&brw->ctx);
1618    }
1619
1620    if (back_rb) {
1621       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1622       attachments[i++] = intel_bits_per_pixel(back_rb);
1623    }
1624
1625    assert(i <= ARRAY_SIZE(attachments));
1626
1627    *buffers =
1628       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1629                                                     &drawable->w,
1630                                                     &drawable->h,
1631                                                     attachments, i / 2,
1632                                                     buffer_count,
1633                                                     drawable->loaderPrivate);
1634 }
1635
1636 /**
1637  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1638  *
1639  * This is called from intel_update_renderbuffers().
1640  *
1641  * \par Note:
1642  *    DRI buffers whose attachment point is DRI2BufferStencil or
1643  *    DRI2BufferDepthStencil are handled as special cases.
1644  *
1645  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1646  *        that is passed to brw_bo_gem_create_from_name().
1647  *
1648  * \see intel_update_renderbuffers()
1649  */
1650 static void
1651 intel_process_dri2_buffer(struct brw_context *brw,
1652                           __DRIdrawable *drawable,
1653                           __DRIbuffer *buffer,
1654                           struct intel_renderbuffer *rb,
1655                           const char *buffer_name)
1656 {
1657    struct gl_framebuffer *fb = drawable->driverPrivate;
1658    struct brw_bo *bo;
1659
1660    if (!rb)
1661       return;
1662
1663    unsigned num_samples = rb->Base.Base.NumSamples;
1664
1665    /* We try to avoid closing and reopening the same BO name, because the first
1666     * use of a mapping of the buffer involves a bunch of page faulting which is
1667     * moderately expensive.
1668     */
1669    struct intel_mipmap_tree *last_mt;
1670    if (num_samples == 0)
1671       last_mt = rb->mt;
1672    else
1673       last_mt = rb->singlesample_mt;
1674
1675    uint32_t old_name = 0;
1676    if (last_mt) {
1677        /* The bo already has a name because the miptree was created by a
1678         * previous call to intel_process_dri2_buffer(). If a bo already has a
1679         * name, then brw_bo_flink() is a low-cost getter.  It does not
1680         * create a new name.
1681         */
1682       brw_bo_flink(last_mt->bo, &old_name);
1683    }
1684
1685    if (old_name == buffer->name)
1686       return;
1687
1688    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1689       fprintf(stderr,
1690               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1691               buffer->name, buffer->attachment,
1692               buffer->cpp, buffer->pitch);
1693    }
1694
1695    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1696                                           buffer->name);
1697    if (!bo) {
1698       fprintf(stderr,
1699               "Failed to open BO for returned DRI2 buffer "
1700               "(%dx%d, %s, named %d).\n"
1701               "This is likely a bug in the X Server that will lead to a "
1702               "crash soon.\n",
1703               drawable->w, drawable->h, buffer_name, buffer->name);
1704       return;
1705    }
1706
1707    uint32_t tiling, swizzle;
1708    brw_bo_get_tiling(bo, &tiling, &swizzle);
1709
1710    struct intel_mipmap_tree *mt =
1711       intel_miptree_create_for_bo(brw,
1712                                   bo,
1713                                   intel_rb_format(rb),
1714                                   0,
1715                                   drawable->w,
1716                                   drawable->h,
1717                                   1,
1718                                   buffer->pitch,
1719                                   isl_tiling_from_i915_tiling(tiling),
1720                                   MIPTREE_CREATE_DEFAULT);
1721    if (!mt) {
1722       brw_bo_unreference(bo);
1723       return;
1724    }
1725
1726    /* We got this BO from X11.  We cana't assume that we have coherent texture
1727     * access because X may suddenly decide to use it for scan-out which would
1728     * destroy coherency.
1729     */
1730    bo->cache_coherent = false;
1731
1732    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1733                                                  drawable->w, drawable->h,
1734                                                  buffer->pitch)) {
1735       brw_bo_unreference(bo);
1736       intel_miptree_release(&mt);
1737       return;
1738    }
1739
1740    if (_mesa_is_front_buffer_drawing(fb) &&
1741        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1742         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1743        rb->Base.Base.NumSamples > 1) {
1744       intel_renderbuffer_upsample(brw, rb);
1745    }
1746
1747    assert(rb->mt);
1748
1749    brw_bo_unreference(bo);
1750 }
1751
1752 /**
1753  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1754  *
1755  * To determine which DRI buffers to request, examine the renderbuffers
1756  * attached to the drawable's framebuffer. Then request the buffers from
1757  * the image loader
1758  *
1759  * This is called from intel_update_renderbuffers().
1760  *
1761  * \param drawable      Drawable whose buffers are queried.
1762  * \param buffers       [out] List of buffers returned by DRI2 query.
1763  * \param buffer_count  [out] Number of buffers returned.
1764  *
1765  * \see intel_update_renderbuffers()
1766  */
1767
1768 static void
1769 intel_update_image_buffer(struct brw_context *intel,
1770                           __DRIdrawable *drawable,
1771                           struct intel_renderbuffer *rb,
1772                           __DRIimage *buffer,
1773                           enum __DRIimageBufferMask buffer_type)
1774 {
1775    struct gl_framebuffer *fb = drawable->driverPrivate;
1776
1777    if (!rb || !buffer->bo)
1778       return;
1779
1780    unsigned num_samples = rb->Base.Base.NumSamples;
1781
1782    /* Check and see if we're already bound to the right
1783     * buffer object
1784     */
1785    struct intel_mipmap_tree *last_mt;
1786    if (num_samples == 0)
1787       last_mt = rb->mt;
1788    else
1789       last_mt = rb->singlesample_mt;
1790
1791    if (last_mt && last_mt->bo == buffer->bo) {
1792       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1793          intel_miptree_make_shareable(intel, last_mt);
1794       }
1795       return;
1796    }
1797
1798    /* Only allow internal compression if samples == 0.  For multisampled
1799     * window system buffers, the only thing the single-sampled buffer is used
1800     * for is as a resolve target.  If we do any compression beyond what is
1801     * supported by the window system, we will just have to resolve so it's
1802     * probably better to just not bother.
1803     */
1804    const bool allow_internal_aux = (num_samples == 0);
1805
1806    struct intel_mipmap_tree *mt =
1807       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1808                                          intel_rb_format(rb),
1809                                          allow_internal_aux);
1810    if (!mt)
1811       return;
1812
1813    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1814                                                  buffer->width, buffer->height,
1815                                                  buffer->pitch)) {
1816       intel_miptree_release(&mt);
1817       return;
1818    }
1819
1820    if (_mesa_is_front_buffer_drawing(fb) &&
1821        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1822        rb->Base.Base.NumSamples > 1) {
1823       intel_renderbuffer_upsample(intel, rb);
1824    }
1825
1826    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1827       /* The compositor and the application may access this image
1828        * concurrently. The display hardware may even scanout the image while
1829        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1830        * concurrent access, so permanently disable aux for this miptree.
1831        *
1832        * Perhaps we could improve overall application performance by
1833        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1834        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1835        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1836        * approach to be highly dependent on the application's GL usage.
1837        *
1838        * I [chadv] expect clever disabling/reenabling to be counterproductive
1839        * in the use cases I care about: applications that render nearly
1840        * realtime handwriting to the surface while possibly undergiong
1841        * simultaneously scanout as a display plane. The app requires low
1842        * render latency. Even though the app spends most of its time in
1843        * shared-buffer mode, it also frequently transitions between
1844        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1845        * mode.  Visual sutter during the transitions should be avoided.
1846        *
1847        * In this case, I [chadv] believe reducing the GPU workload at
1848        * shared-buffer/double-buffer transitions would offer a smoother app
1849        * experience than any savings due to aux compression. But I've
1850        * collected no data to prove my theory.
1851        */
1852       intel_miptree_make_shareable(intel, mt);
1853    }
1854 }
1855
1856 static void
1857 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1858 {
1859    struct gl_framebuffer *fb = drawable->driverPrivate;
1860    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1861    struct intel_renderbuffer *front_rb;
1862    struct intel_renderbuffer *back_rb;
1863    struct __DRIimageList images;
1864    mesa_format format;
1865    uint32_t buffer_mask = 0;
1866    int ret;
1867
1868    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1869    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1870
1871    if (back_rb)
1872       format = intel_rb_format(back_rb);
1873    else if (front_rb)
1874       format = intel_rb_format(front_rb);
1875    else
1876       return;
1877
1878    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1879                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1880       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1881    }
1882
1883    if (back_rb)
1884       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1885
1886    ret = dri_screen->image.loader->getBuffers(drawable,
1887                                               driGLFormatToImageFormat(format),
1888                                               &drawable->dri2.stamp,
1889                                               drawable->loaderPrivate,
1890                                               buffer_mask,
1891                                               &images);
1892    if (!ret)
1893       return;
1894
1895    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1896       drawable->w = images.front->width;
1897       drawable->h = images.front->height;
1898       intel_update_image_buffer(brw,
1899                                 drawable,
1900                                 front_rb,
1901                                 images.front,
1902                                 __DRI_IMAGE_BUFFER_FRONT);
1903    }
1904
1905    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1906       drawable->w = images.back->width;
1907       drawable->h = images.back->height;
1908       intel_update_image_buffer(brw,
1909                                 drawable,
1910                                 back_rb,
1911                                 images.back,
1912                                 __DRI_IMAGE_BUFFER_BACK);
1913    }
1914
1915    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1916       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1917       drawable->w = images.back->width;
1918       drawable->h = images.back->height;
1919       intel_update_image_buffer(brw,
1920                                 drawable,
1921                                 back_rb,
1922                                 images.back,
1923                                 __DRI_IMAGE_BUFFER_SHARED);
1924       brw->is_shared_buffer_bound = true;
1925    } else {
1926       brw->is_shared_buffer_bound = false;
1927       brw->is_shared_buffer_dirty = false;
1928    }
1929 }