src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "main/macros.h"
  41 #include "main/points.h"
  42 #include "main/version.h"
  43 #include "main/vtxfmt.h"
  44 #include "main/texobj.h"
  45 #include "main/framebuffer.h"
  46 #include "main/stencil.h"
  47 #include "main/state.h"
  48 #include "main/spirv_extensions.h"
  49
  50 #include "vbo/vbo.h"
  51
  52 #include "drivers/common/driverfuncs.h"
  53 #include "drivers/common/meta.h"
  54 #include "utils.h"
  55
  56 #include "brw_context.h"
  57 #include "brw_defines.h"
  58 #include "brw_blorp.h"
  59 #include "brw_draw.h"
  60 #include "brw_state.h"
  61
  62 #include "intel_batchbuffer.h"
  63 #include "intel_buffer_objects.h"
  64 #include "intel_buffers.h"
  65 #include "intel_fbo.h"
  66 #include "intel_mipmap_tree.h"
  67 #include "intel_pixel.h"
  68 #include "intel_image.h"
  69 #include "intel_tex.h"
  70 #include "intel_tex_obj.h"
  71
  72 #include "swrast_setup/swrast_setup.h"
  73 #include "tnl/tnl.h"
  74 #include "tnl/t_pipeline.h"
  75 #include "util/ralloc.h"
  76 #include "util/debug.h"
  77 #include "util/disk_cache.h"
  78 #include "util/u_memory.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    static char buf[128];
 107    const char *name = gen_get_device_name(screen->deviceID);
 108
 109    if (!name)
 110       name = "Intel Unknown";
 111
 112    snprintf(buf, sizeof(buf), "Mesa DRI %s", name);
 113
 114    /* Braswell branding is funny, so we have to fix it up here */
 115    if (screen->deviceID == 0x22B1) {
 116       char *needle = strstr(buf, "XXX");
 117       if (needle)
 118          memcpy(needle, get_bsw_model(screen), 3);
 119    }
 120
 121    return buf;
 122 }
 123
 124 static const GLubyte *
 125 intel_get_string(struct gl_context * ctx, GLenum name)
 126 {
 127    const struct brw_context *const brw = brw_context(ctx);
 128
 129    switch (name) {
 130    case GL_VENDOR:
 131       return (GLubyte *) brw_vendor_string;
 132
 133    case GL_RENDERER:
 134       return
 135          (GLubyte *) brw_get_renderer_string(brw->screen);
 136
 137    default:
 138       return NULL;
 139    }
 140 }
 141
 142 static void
 143 brw_set_background_context(struct gl_context *ctx,
 144                            struct util_queue_monitoring *queue_info)
 145 {
 146    struct brw_context *brw = brw_context(ctx);
 147    __DRIcontext *driContext = brw->driContext;
 148    __DRIscreen *driScreen = driContext->driScreenPriv;
 149    const __DRIbackgroundCallableExtension *backgroundCallable =
 150       driScreen->dri2.backgroundCallable;
 151
 152    /* Note: Mesa will only call this function if we've called
 153     * _mesa_enable_multithreading().  We only do that if the loader exposed
 154     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 155     * backgroundCallable is not NULL.
 156     */
 157    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 158 }
 159
 160 static void
 161 intel_viewport(struct gl_context *ctx)
 162 {
 163    struct brw_context *brw = brw_context(ctx);
 164    __DRIcontext *driContext = brw->driContext;
 165
 166    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 167       if (driContext->driDrawablePriv)
 168          dri2InvalidateDrawable(driContext->driDrawablePriv);
 169       if (driContext->driReadablePriv)
 170          dri2InvalidateDrawable(driContext->driReadablePriv);
 171    }
 172 }
 173
 174 static void
 175 intel_update_framebuffer(struct gl_context *ctx,
 176                          struct gl_framebuffer *fb)
 177 {
 178    struct brw_context *brw = brw_context(ctx);
 179
 180    /* Quantize the derived default number of samples
 181     */
 182    fb->DefaultGeometry._NumSamples =
 183       intel_quantize_num_samples(brw->screen,
 184                                  fb->DefaultGeometry.NumSamples);
 185 }
 186
 187 static void
 188 intel_update_state(struct gl_context * ctx)
 189 {
 190    GLuint new_state = ctx->NewState;
 191    struct brw_context *brw = brw_context(ctx);
 192
 193    if (ctx->swrast_context)
 194       _swrast_InvalidateState(ctx, new_state);
 195
 196    brw->NewGLState |= new_state;
 197
 198    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 199       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 200
 201    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 202       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 203       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 204       brw->stencil_write_enabled =
 205          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 206    }
 207
 208    if (new_state & _NEW_POLYGON)
 209       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 210
 211    if (new_state & _NEW_BUFFERS) {
 212       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 213       if (ctx->DrawBuffer != ctx->ReadBuffer)
 214          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 215    }
 216 }
 217
 218 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 219
 220 static void
 221 intel_flush_front(struct gl_context *ctx)
 222 {
 223    struct brw_context *brw = brw_context(ctx);
 224    __DRIcontext *driContext = brw->driContext;
 225    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 226    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 227
 228    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 229       if (flushFront(dri_screen) && driDrawable &&
 230           driDrawable->loaderPrivate) {
 231
 232          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 233           *
 234           * This potentially resolves both front and back buffer. It
 235           * is unnecessary to resolve the back, but harms nothing except
 236           * performance. And no one cares about front-buffer render
 237           * performance.
 238           */
 239          intel_resolve_for_dri2_flush(brw, driDrawable);
 240          intel_batchbuffer_flush(brw);
 241
 242          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 243
 244          /* We set the dirty bit in intel_prepare_render() if we're
 245           * front buffer rendering once we get there.
 246           */
 247          brw->front_buffer_dirty = false;
 248       }
 249    }
 250 }
 251
 252 static void
 253 brw_display_shared_buffer(struct brw_context *brw)
 254 {
 255    __DRIcontext *dri_context = brw->driContext;
 256    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 257    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 258    int fence_fd = -1;
 259
 260    if (!brw->is_shared_buffer_bound)
 261       return;
 262
 263    if (!brw->is_shared_buffer_dirty)
 264       return;
 265
 266    if (brw->screen->has_exec_fence) {
 267       /* This function is always called during a flush operation, so there is
 268        * no need to flush again here. But we want to provide a fence_fd to the
 269        * loader, and a redundant flush is the easiest way to acquire one.
 270        */
 271       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 272          return;
 273    }
 274
 275    dri_screen->mutableRenderBuffer.loader
 276       ->displaySharedBuffer(dri_drawable, fence_fd,
 277                             dri_drawable->loaderPrivate);
 278    brw->is_shared_buffer_dirty = false;
 279 }
 280
 281 static void
 282 intel_glFlush(struct gl_context *ctx)
 283 {
 284    struct brw_context *brw = brw_context(ctx);
 285
 286    intel_batchbuffer_flush(brw);
 287    intel_flush_front(ctx);
 288    brw_display_shared_buffer(brw);
 289    brw->need_flush_throttle = true;
 290 }
 291
 292 static void
 293 intel_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state)
 294 {
 295    struct brw_context *brw = brw_context(ctx);
 296
 297    switch (cap) {
 298    case GL_BLACKHOLE_RENDER_INTEL:
 299       brw->frontend_noop = state;
 300       intel_batchbuffer_flush(brw);
 301       intel_batchbuffer_maybe_noop(brw);
 302       /* Because we started previous batches with a potential
 303        * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything
 304        * that was ever emitted after that never made it to the HW. So when the
 305        * blackhole state changes from NOOP->!NOOP reupload the entire state.
 306        */
 307       if (!brw->frontend_noop) {
 308          brw->NewGLState = ~0u;
 309          brw->ctx.NewDriverState = ~0ull;
 310       }
 311       break;
 312    default:
 313       break;
 314    }
 315 }
 316
 317 static void
 318 intel_finish(struct gl_context * ctx)
 319 {
 320    struct brw_context *brw = brw_context(ctx);
 321
 322    intel_glFlush(ctx);
 323
 324    if (brw->batch.last_bo)
 325       brw_bo_wait_rendering(brw->batch.last_bo);
 326 }
 327
 328 static void
 329 brw_init_driver_functions(struct brw_context *brw,
 330                           struct dd_function_table *functions)
 331 {
 332    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 333
 334    _mesa_init_driver_functions(functions);
 335
 336    /* GLX uses DRI2 invalidate events to handle window resizing.
 337     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 338     * which doesn't provide a mechanism for snooping the event queues.
 339     *
 340     * So EGL still relies on viewport hacks to handle window resizing.
 341     * This should go away with DRI3000.
 342     */
 343    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 344       functions->Viewport = intel_viewport;
 345
 346    functions->Enable = intel_glEnable;
 347    functions->Flush = intel_glFlush;
 348    functions->Finish = intel_finish;
 349    functions->GetString = intel_get_string;
 350    functions->UpdateState = intel_update_state;
 351
 352    brw_init_draw_functions(functions);
 353    intelInitTextureFuncs(functions);
 354    intelInitTextureImageFuncs(functions);
 355    intelInitTextureCopyImageFuncs(functions);
 356    intelInitCopyImageFuncs(functions);
 357    intelInitClearFuncs(functions);
 358    intelInitBufferFuncs(functions);
 359    intelInitPixelFuncs(functions);
 360    intelInitBufferObjectFuncs(functions);
 361    brw_init_syncobj_functions(functions);
 362    brw_init_object_purgeable_functions(functions);
 363
 364    brwInitFragProgFuncs( functions );
 365    brw_init_common_queryobj_functions(functions);
 366    if (devinfo->gen >= 8 || devinfo->is_haswell)
 367       hsw_init_queryobj_functions(functions);
 368    else if (devinfo->gen >= 6)
 369       gen6_init_queryobj_functions(functions);
 370    else
 371       gen4_init_queryobj_functions(functions);
 372    brw_init_compute_functions(functions);
 373    brw_init_conditional_render_functions(functions);
 374
 375    functions->GenerateMipmap = brw_generate_mipmap;
 376
 377    functions->QueryInternalFormat = brw_query_internal_format;
 378
 379    functions->NewTransformFeedback = brw_new_transform_feedback;
 380    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 381    if (can_do_mi_math_and_lrr(brw->screen)) {
 382       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 383       functions->EndTransformFeedback = hsw_end_transform_feedback;
 384       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 385       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 386    } else if (devinfo->gen >= 7) {
 387       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 388       functions->EndTransformFeedback = gen7_end_transform_feedback;
 389       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 390       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 391       functions->GetTransformFeedbackVertexCount =
 392          brw_get_transform_feedback_vertex_count;
 393    } else {
 394       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 395       functions->EndTransformFeedback = brw_end_transform_feedback;
 396       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 397       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 398       functions->GetTransformFeedbackVertexCount =
 399          brw_get_transform_feedback_vertex_count;
 400    }
 401
 402    if (devinfo->gen >= 6)
 403       functions->GetSamplePosition = gen6_get_sample_position;
 404
 405    /* GL_ARB_get_program_binary */
 406    brw_program_binary_init(brw->screen->deviceID);
 407    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 408    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 409    functions->ProgramBinaryDeserializeDriverBlob =
 410       brw_deserialize_program_binary;
 411
 412    if (brw->screen->disk_cache) {
 413       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 414    }
 415
 416    functions->SetBackgroundContext = brw_set_background_context;
 417 }
 418
 419 static void
 420 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 421 {
 422    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 423    struct gl_context *ctx = &brw->ctx;
 424
 425    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 426     * you should enable the extension only on gen7+, but just in case let's
 427     * assert it.
 428     */
 429    assert(devinfo->gen >= 7);
 430
 431    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 432    ctx->Const.SpirVCapabilities.draw_parameters = true;
 433    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 434    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 435    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 436    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 437    ctx->Const.SpirVCapabilities.tessellation = true;
 438    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 439    ctx->Const.SpirVCapabilities.variable_pointers = true;
 440    ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->gen >= 8;
 441 }
 442
 443 static void
 444 brw_initialize_context_constants(struct brw_context *brw)
 445 {
 446    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 447    struct gl_context *ctx = &brw->ctx;
 448    const struct brw_compiler *compiler = brw->screen->compiler;
 449
 450    const bool stage_exists[MESA_SHADER_STAGES] = {
 451       [MESA_SHADER_VERTEX] = true,
 452       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 453       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 454       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 455       [MESA_SHADER_FRAGMENT] = true,
 456       [MESA_SHADER_COMPUTE] =
 457          (_mesa_is_desktop_gl(ctx) &&
 458           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 459          (ctx->API == API_OPENGLES2 &&
 460           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 461    };
 462
 463    unsigned num_stages = 0;
 464    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 465       if (stage_exists[i])
 466          num_stages++;
 467    }
 468
 469    unsigned max_samplers =
 470       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 471
 472    ctx->Const.MaxDualSourceDrawBuffers = 1;
 473    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 474    ctx->Const.MaxCombinedShaderOutputResources =
 475       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 476
 477    /* The timestamp register we can read for glGetTimestamp() is
 478     * sometimes only 32 bits, before scaling to nanoseconds (depending
 479     * on kernel).
 480     *
 481     * Once scaled to nanoseconds the timestamp would roll over at a
 482     * non-power-of-two, so an application couldn't use
 483     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 484     * report 36 bits and truncate at that (rolling over 5 times as
 485     * often as the HW counter), and when the 32-bit counter rolls
 486     * over, it happens to also be at a rollover in the reported value
 487     * from near (1<<36) to 0.
 488     *
 489     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 490     * rolls over every ~69 seconds.
 491     */
 492    ctx->Const.QueryCounterBits.Timestamp = 36;
 493
 494    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 495    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 496    if (devinfo->gen >= 7) {
 497       ctx->Const.MaxRenderbufferSize = 16384;
 498       ctx->Const.MaxTextureSize = 16384;
 499       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 500    } else {
 501       ctx->Const.MaxRenderbufferSize = 8192;
 502       ctx->Const.MaxTextureSize = 8192;
 503       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 504    }
 505    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 506    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 507    ctx->Const.MaxTextureMbytes = 1536;
 508    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 509    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 510    ctx->Const.MaxTextureLodBias = 15.0;
 511    ctx->Const.StripTextureBorder = true;
 512    if (devinfo->gen >= 7) {
 513       ctx->Const.MaxProgramTextureGatherComponents = 4;
 514       ctx->Const.MinProgramTextureGatherOffset = -32;
 515       ctx->Const.MaxProgramTextureGatherOffset = 31;
 516    } else if (devinfo->gen == 6) {
 517       ctx->Const.MaxProgramTextureGatherComponents = 1;
 518       ctx->Const.MinProgramTextureGatherOffset = -8;
 519       ctx->Const.MaxProgramTextureGatherOffset = 7;
 520    }
 521
 522    ctx->Const.MaxUniformBlockSize = 65536;
 523
 524    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 525       struct gl_program_constants *prog = &ctx->Const.Program[i];
 526
 527       if (!stage_exists[i])
 528          continue;
 529
 530       prog->MaxTextureImageUnits = max_samplers;
 531
 532       prog->MaxUniformBlocks = BRW_MAX_UBO;
 533       prog->MaxCombinedUniformComponents =
 534          prog->MaxUniformComponents +
 535          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 536
 537       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 538       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 539       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 540       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 541    }
 542
 543    ctx->Const.MaxTextureUnits =
 544       MIN2(ctx->Const.MaxTextureCoordUnits,
 545            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 546
 547    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 548    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 549    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 550    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 551    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 552    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 553    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 554
 555
 556    /* Hardware only supports a limited number of transform feedback buffers.
 557     * So we need to override the Mesa default (which is based only on software
 558     * limits).
 559     */
 560    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 561
 562    /* On Gen6, in the worst case, we use up one binding table entry per
 563     * transform feedback component (see comments above the definition of
 564     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 565     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 566     * BRW_MAX_SOL_BINDINGS.
 567     *
 568     * In "separate components" mode, we need to divide this value by
 569     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 570     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 571     */
 572    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 573    ctx->Const.MaxTransformFeedbackSeparateComponents =
 574       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 575
 576    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 577       !can_do_mi_math_and_lrr(brw->screen);
 578
 579    int max_samples;
 580    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 581    const int clamp_max_samples =
 582       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 583
 584    if (clamp_max_samples < 0) {
 585       max_samples = msaa_modes[0];
 586    } else {
 587       /* Select the largest supported MSAA mode that does not exceed
 588        * clamp_max_samples.
 589        */
 590       max_samples = 0;
 591       for (int i = 0; msaa_modes[i] != 0; ++i) {
 592          if (msaa_modes[i] <= clamp_max_samples) {
 593             max_samples = msaa_modes[i];
 594             break;
 595          }
 596       }
 597    }
 598
 599    ctx->Const.MaxSamples = max_samples;
 600    ctx->Const.MaxColorTextureSamples = max_samples;
 601    ctx->Const.MaxDepthTextureSamples = max_samples;
 602    ctx->Const.MaxIntegerSamples = max_samples;
 603    ctx->Const.MaxImageSamples = 0;
 604
 605    ctx->Const.MinLineWidth = 1.0;
 606    ctx->Const.MinLineWidthAA = 1.0;
 607    if (devinfo->gen >= 6) {
 608       ctx->Const.MaxLineWidth = 7.375;
 609       ctx->Const.MaxLineWidthAA = 7.375;
 610       ctx->Const.LineWidthGranularity = 0.125;
 611    } else {
 612       ctx->Const.MaxLineWidth = 7.0;
 613       ctx->Const.MaxLineWidthAA = 7.0;
 614       ctx->Const.LineWidthGranularity = 0.5;
 615    }
 616
 617    /* For non-antialiased lines, we have to round the line width to the
 618     * nearest whole number. Make sure that we don't advertise a line
 619     * width that, when rounded, will be beyond the actual hardware
 620     * maximum.
 621     */
 622    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 623
 624    ctx->Const.MinPointSize = 1.0;
 625    ctx->Const.MinPointSizeAA = 1.0;
 626    ctx->Const.MaxPointSize = 255.0;
 627    ctx->Const.MaxPointSizeAA = 255.0;
 628    ctx->Const.PointSizeGranularity = 1.0;
 629
 630    if (devinfo->gen >= 5 || devinfo->is_g4x)
 631       ctx->Const.MaxClipPlanes = 8;
 632
 633    ctx->Const.GLSLFragCoordIsSysVal = true;
 634    ctx->Const.GLSLFrontFacingIsSysVal = true;
 635    ctx->Const.GLSLTessLevelsAsInputs = true;
 636    ctx->Const.PrimitiveRestartForPatches = true;
 637
 638    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 639    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 640    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 641    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 642    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 643    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 644    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 645    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 646    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 647    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 648    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 649    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 650       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 651            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 652
 653    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 654    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 655    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 656    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 657    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 658    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 659    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 660    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 661    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 662       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 663            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 664
 665    /* Fragment shaders use real, 32-bit twos-complement integers for all
 666     * integer types.
 667     */
 668    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 669    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 670    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 671    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 672    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 673
 674    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 675    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 676    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 677    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 678    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 679
 680    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 681     * but we're not sure how it's actually done for vertex order,
 682     * that affect provoking vertex decision. Always use last vertex
 683     * convention for quad primitive which works as expected for now.
 684     */
 685    if (devinfo->gen >= 6)
 686       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 687
 688    ctx->Const.NativeIntegers = true;
 689
 690    /* Regarding the CMP instruction, the Ivybridge PRM says:
 691     *
 692     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 693     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 694     *    0xFFFFFFFF) is assigned to dst."
 695     *
 696     * but PRMs for earlier generations say
 697     *
 698     *   "In dword format, one GRF may store up to 8 results. When the register
 699     *    is used later as a vector of Booleans, as only LSB at each channel
 700     *    contains meaning [sic] data, software should make sure all higher bits
 701     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 702     *
 703     * We select the representation of a true boolean uniform to be ~0, and fix
 704     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 705     */
 706    ctx->Const.UniformBooleanTrue = ~0;
 707
 708    /* From the gen4 PRM, volume 4 page 127:
 709     *
 710     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 711     *      the base address of the first element of the surface, computed in
 712     *      software by adding the surface base address to the byte offset of
 713     *      the element in the buffer."
 714     *
 715     * However, unaligned accesses are slower, so enforce buffer alignment.
 716     *
 717     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 718     * restriction: the start of the buffer needs to be 32B aligned.
 719     */
 720    ctx->Const.UniformBufferOffsetAlignment = 32;
 721
 722    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 723     * that we can safely have the CPU and GPU writing the same SSBO on
 724     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 725     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 726     * be updating disjoint regions of the buffer simultaneously and that will
 727     * break if the regions overlap the same cacheline.
 728     */
 729    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 730    ctx->Const.TextureBufferOffsetAlignment = 16;
 731    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 732
 733    if (devinfo->gen >= 6) {
 734       ctx->Const.MaxVarying = 32;
 735       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 736       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 737          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 738       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 739       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 740       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 741       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 742       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 743       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 744    }
 745
 746    /* We want the GLSL compiler to emit code that uses condition codes */
 747    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 748       ctx->Const.ShaderCompilerOptions[i] =
 749          brw->screen->compiler->glsl_compiler_options[i];
 750    }
 751
 752    if (devinfo->gen >= 7) {
 753       ctx->Const.MaxViewportWidth = 32768;
 754       ctx->Const.MaxViewportHeight = 32768;
 755    }
 756
 757    /* ARB_viewport_array, OES_viewport_array */
 758    if (devinfo->gen >= 6) {
 759       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 760       ctx->Const.ViewportSubpixelBits = 8;
 761
 762       /* Cast to float before negating because MaxViewportWidth is unsigned.
 763        */
 764       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 765       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 766    }
 767
 768    /* ARB_gpu_shader5 */
 769    if (devinfo->gen >= 7)
 770       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 771
 772    /* ARB_framebuffer_no_attachments */
 773    ctx->Const.MaxFramebufferWidth = 16384;
 774    ctx->Const.MaxFramebufferHeight = 16384;
 775    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 776    ctx->Const.MaxFramebufferSamples = max_samples;
 777
 778    /* OES_primitive_bounding_box */
 779    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 780
 781    /* TODO: We should be able to use STD430 packing by default on all hardware
 782     * but some piglit tests [1] currently fail on SNB when this is enabled.
 783     * The problem is the messages we're using for doing uniform pulls
 784     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 785     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 786     * sampler which doesn't have these restrictions.
 787     *
 788     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 789     * pull an entire cache line at a time for constant offset loads both of
 790     * which support almost any alignment.
 791     *
 792     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 793     */
 794    if (devinfo->gen >= 7)
 795       ctx->Const.UseSTD430AsDefaultPacking = true;
 796
 797    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 798       ctx->Const.AllowMappedBuffersDuringExecution = true;
 799
 800    /* GL_ARB_get_program_binary */
 801    ctx->Const.NumProgramBinaryFormats = 1;
 802 }
 803
 804 static void
 805 brw_initialize_cs_context_constants(struct brw_context *brw)
 806 {
 807    struct gl_context *ctx = &brw->ctx;
 808    const struct intel_screen *screen = brw->screen;
 809    struct gen_device_info *devinfo = &brw->screen->devinfo;
 810
 811    /* FINISHME: Do this for all platforms that the kernel supports */
 812    if (devinfo->is_cherryview &&
 813        screen->subslice_total > 0 && screen->eu_total > 0) {
 814       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 815       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 816
 817       /* Fuse configurations may give more threads than expected, never less. */
 818       if (max_cs_threads > devinfo->max_cs_threads)
 819          devinfo->max_cs_threads = max_cs_threads;
 820    }
 821
 822    /* Maximum number of scalar compute shader invocations that can be run in
 823     * parallel in the same subslice assuming SIMD32 dispatch.
 824     *
 825     * We don't advertise more than 64 threads, because we are limited to 64 by
 826     * our usage of thread_width_max in the gpgpu walker command. This only
 827     * currently impacts Haswell, which otherwise might be able to advertise 70
 828     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 829     * required the number of invocation needed for ARB_compute_shader.
 830     */
 831    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 832    const uint32_t max_invocations = 32 * max_threads;
 833    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 834    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 835    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 836    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 837    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 838
 839    /* Constants used for ARB_compute_variable_group_size.  The compiler will
 840     * use the maximum to decide which SIMDs can be used.  If we top this like
 841     * max_invocations, that would prevent SIMD8 / SIMD16 to be considered.
 842     *
 843     * TODO: To avoid the trade off above between having the lower maximum
 844     * vs. always using SIMD32, keep all three shader variants (for each SIMD)
 845     * and select a suitable one at dispatch time.
 846     */
 847    if (devinfo->gen >= 7) {
 848       const uint32_t max_var_invocations =
 849          (max_threads >= 64 ? 8 : (max_threads >= 32 ? 16 : 32)) * max_threads;
 850       assert(max_var_invocations >= 512);
 851       ctx->Const.MaxComputeVariableGroupSize[0] = max_var_invocations;
 852       ctx->Const.MaxComputeVariableGroupSize[1] = max_var_invocations;
 853       ctx->Const.MaxComputeVariableGroupSize[2] = max_var_invocations;
 854       ctx->Const.MaxComputeVariableGroupInvocations = max_var_invocations;
 855    }
 856 }
 857
 858 /**
 859  * Process driconf (drirc) options, setting appropriate context flags.
 860  *
 861  * intelInitExtensions still pokes at optionCache directly, in order to
 862  * avoid advertising various extensions.  No flags are set, so it makes
 863  * sense to continue doing that there.
 864  */
 865 static void
 866 brw_process_driconf_options(struct brw_context *brw)
 867 {
 868    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 869    struct gl_context *ctx = &brw->ctx;
 870
 871    driOptionCache *options = &brw->optionCache;
 872    driParseConfigFiles(options, &brw->screen->optionCache,
 873                        brw->driContext->driScreenPriv->myNum,
 874                        "i965", NULL, NULL, 0);
 875
 876    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 877        brw->has_hiz = false;
 878        /* On gen6, you can only do separate stencil with HIZ. */
 879        if (devinfo->gen == 6)
 880           brw->has_separate_stencil = false;
 881    }
 882
 883    if (driQueryOptionb(options, "mesa_no_error"))
 884       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 885
 886    if (driQueryOptionb(options, "always_flush_batch")) {
 887       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 888       brw->always_flush_batch = true;
 889    }
 890
 891    if (driQueryOptionb(options, "always_flush_cache")) {
 892       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 893       brw->always_flush_cache = true;
 894    }
 895
 896    if (driQueryOptionb(options, "disable_throttling")) {
 897       fprintf(stderr, "disabling flush throttling\n");
 898       brw->disable_throttling = true;
 899    }
 900
 901    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 902
 903    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 904       brw->screen->compiler->precise_trig = true;
 905
 906    ctx->Const.ForceGLSLExtensionsWarn =
 907       driQueryOptionb(options, "force_glsl_extensions_warn");
 908
 909    ctx->Const.ForceGLSLVersion =
 910       driQueryOptioni(options, "force_glsl_version");
 911
 912    ctx->Const.DisableGLSLLineContinuations =
 913       driQueryOptionb(options, "disable_glsl_line_continuations");
 914
 915    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 916       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 917
 918    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 919       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 920
 921    ctx->Const.AllowHigherCompatVersion =
 922       driQueryOptionb(options, "allow_higher_compat_version");
 923
 924    ctx->Const.ForceGLSLAbsSqrt =
 925       driQueryOptionb(options, "force_glsl_abs_sqrt");
 926
 927    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 928
 929    brw->dual_color_blend_by_location =
 930       driQueryOptionb(options, "dual_color_blend_by_location");
 931
 932    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 933       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 934
 935    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 936    driComputeOptionsSha1(&brw->screen->optionCache,
 937                          ctx->Const.dri_config_options_sha1);
 938 }
 939
 940 GLboolean
 941 brwCreateContext(gl_api api,
 942                  const struct gl_config *mesaVis,
 943                  __DRIcontext *driContextPriv,
 944                  const struct __DriverContextConfig *ctx_config,
 945                  unsigned *dri_ctx_error,
 946                  void *sharedContextPrivate)
 947 {
 948    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 949    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 950    const struct gen_device_info *devinfo = &screen->devinfo;
 951    struct dd_function_table functions;
 952
 953    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 954     * provides us with context reset notifications.
 955     */
 956    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 957                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 958                             __DRI_CTX_FLAG_NO_ERROR;
 959
 960    if (screen->has_context_reset_notification)
 961       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 962
 963    if (ctx_config->flags & ~allowed_flags) {
 964       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 965       return false;
 966    }
 967
 968    if (ctx_config->attribute_mask &
 969        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 970          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 971       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 972       return false;
 973    }
 974
 975    bool notify_reset =
 976       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 977        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 978
 979    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 980    if (!brw) {
 981       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 982       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 983       return false;
 984    }
 985    brw->perf_ctx = gen_perf_new_context(brw);
 986
 987    driContextPriv->driverPrivate = brw;
 988    brw->driContext = driContextPriv;
 989    brw->screen = screen;
 990    brw->bufmgr = screen->bufmgr;
 991
 992    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 993    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 994
 995    brw->has_swizzling = screen->hw_has_swizzling;
 996
 997    brw->isl_dev = screen->isl_dev;
 998
 999    brw->vs.base.stage = MESA_SHADER_VERTEX;
1000    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
1001    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
1002    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
1003    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
1004    brw->cs.base.stage = MESA_SHADER_COMPUTE;
1005
1006    brw_init_driver_functions(brw, &functions);
1007
1008    if (notify_reset)
1009       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
1010
1011    brw_process_driconf_options(brw);
1012
1013    if (api == API_OPENGL_CORE &&
1014        driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
1015       api = API_OPENGL_COMPAT;
1016    }
1017
1018    struct gl_context *ctx = &brw->ctx;
1019
1020    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
1021       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1022       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
1023       intelDestroyContext(driContextPriv);
1024       return false;
1025    }
1026
1027    driContextSetFlags(ctx, ctx_config->flags);
1028
1029    /* Initialize the software rasterizer and helper modules.
1030     *
1031     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1032     * software fallbacks (which we have to support on legacy GL to do weird
1033     * glDrawPixels(), glBitmap(), and other functions).
1034     */
1035    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1036       _swrast_CreateContext(ctx);
1037    }
1038
1039    _vbo_CreateContext(ctx, true);
1040    if (ctx->swrast_context) {
1041       _tnl_CreateContext(ctx);
1042       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1043       _swsetup_CreateContext(ctx);
1044
1045       /* Configure swrast to match hardware characteristics: */
1046       _swrast_allow_pixel_fog(ctx, false);
1047       _swrast_allow_vertex_fog(ctx, true);
1048    }
1049
1050    _mesa_meta_init(ctx);
1051
1052    if (INTEL_DEBUG & DEBUG_PERF)
1053       brw->perf_debug = true;
1054
1055    brw_initialize_cs_context_constants(brw);
1056    brw_initialize_context_constants(brw);
1057
1058    ctx->Const.ResetStrategy = notify_reset
1059       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1060
1061    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1062    _mesa_init_point(ctx);
1063
1064    intel_fbo_init(brw);
1065
1066    intel_batchbuffer_init(brw);
1067
1068    /* Create a new hardware context.  Using a hardware context means that
1069     * our GPU state will be saved/restored on context switch, allowing us
1070     * to assume that the GPU is in the same state we left it in.
1071     *
1072     * This is required for transform feedback buffer offsets, query objects,
1073     * and also allows us to reduce how much state we have to emit.
1074     */
1075    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1076    if (!brw->hw_ctx && devinfo->gen >= 6) {
1077       fprintf(stderr, "Failed to create hardware context.\n");
1078       intelDestroyContext(driContextPriv);
1079       return false;
1080    }
1081
1082    if (brw->hw_ctx) {
1083       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1084       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1085          switch (ctx_config->priority) {
1086          case __DRI_CTX_PRIORITY_LOW:
1087             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1088             break;
1089          case __DRI_CTX_PRIORITY_HIGH:
1090             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1091             break;
1092          }
1093       }
1094       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1095           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1096          fprintf(stderr,
1097                  "Failed to set priority [%d:%d] for hardware context.\n",
1098                  ctx_config->priority, hw_priority);
1099          intelDestroyContext(driContextPriv);
1100          return false;
1101       }
1102    }
1103
1104    if (brw_init_pipe_control(brw, devinfo)) {
1105       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1106       intelDestroyContext(driContextPriv);
1107       return false;
1108    }
1109
1110    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1111
1112    brw_init_state(brw);
1113
1114    intelInitExtensions(ctx);
1115
1116    brw_init_surface_formats(brw);
1117
1118    brw_blorp_init(brw);
1119
1120    brw->urb.size = devinfo->urb.size;
1121
1122    if (devinfo->gen == 6)
1123       brw->urb.gs_present = false;
1124
1125    brw->prim_restart.in_progress = false;
1126    brw->prim_restart.enable_cut_index = false;
1127    brw->gs.enabled = false;
1128    brw->clip.viewport_count = 1;
1129
1130    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1131
1132    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1133
1134    ctx->VertexProgram._MaintainTnlProgram = true;
1135    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1136
1137    brw_draw_init( brw );
1138
1139    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1140       /* Turn on some extra GL_ARB_debug_output generation. */
1141       brw->perf_debug = true;
1142    }
1143
1144    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1145       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1146       ctx->Const.RobustAccess = GL_TRUE;
1147    }
1148
1149    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1150       brw_init_shader_time(brw);
1151
1152    _mesa_override_extensions(ctx);
1153    _mesa_compute_version(ctx);
1154
1155    /* GL_ARB_gl_spirv */
1156    if (ctx->Extensions.ARB_gl_spirv) {
1157       brw_initialize_spirv_supported_capabilities(brw);
1158
1159       if (ctx->Extensions.ARB_spirv_extensions) {
1160          /* GL_ARB_spirv_extensions */
1161          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1162          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1163                                                &ctx->Const.SpirVCapabilities);
1164       }
1165    }
1166
1167    _mesa_initialize_dispatch_tables(ctx);
1168    _mesa_initialize_vbo_vtxfmt(ctx);
1169
1170    if (ctx->Extensions.INTEL_performance_query)
1171       brw_init_performance_queries(brw);
1172
1173    brw->ctx.Cache = brw->screen->disk_cache;
1174
1175    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1176        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1177       /* Loader supports multithreading, and so do we. */
1178       _mesa_glthread_init(ctx);
1179    }
1180
1181    return true;
1182 }
1183
1184 void
1185 intelDestroyContext(__DRIcontext * driContextPriv)
1186 {
1187    struct brw_context *brw =
1188       (struct brw_context *) driContextPriv->driverPrivate;
1189    struct gl_context *ctx = &brw->ctx;
1190
1191    GET_CURRENT_CONTEXT(curctx);
1192
1193    if (curctx == NULL) {
1194       /* No current context, but we need one to release
1195        * renderbuffer surface when we release framebuffer.
1196        * So temporarily bind the context.
1197        */
1198       _mesa_make_current(ctx, NULL, NULL);
1199    }
1200
1201    _mesa_glthread_destroy(&brw->ctx);
1202
1203    _mesa_meta_free(&brw->ctx);
1204
1205    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1206       /* Force a report. */
1207       brw->shader_time.report_time = 0;
1208
1209       brw_collect_and_report_shader_time(brw);
1210       brw_destroy_shader_time(brw);
1211    }
1212
1213    blorp_finish(&brw->blorp);
1214
1215    brw_destroy_state(brw);
1216    brw_draw_destroy(brw);
1217
1218    brw_bo_unreference(brw->curbe.curbe_bo);
1219
1220    brw_bo_unreference(brw->vs.base.scratch_bo);
1221    brw_bo_unreference(brw->tcs.base.scratch_bo);
1222    brw_bo_unreference(brw->tes.base.scratch_bo);
1223    brw_bo_unreference(brw->gs.base.scratch_bo);
1224    brw_bo_unreference(brw->wm.base.scratch_bo);
1225
1226    brw_bo_unreference(brw->vs.base.push_const_bo);
1227    brw_bo_unreference(brw->tcs.base.push_const_bo);
1228    brw_bo_unreference(brw->tes.base.push_const_bo);
1229    brw_bo_unreference(brw->gs.base.push_const_bo);
1230    brw_bo_unreference(brw->wm.base.push_const_bo);
1231
1232    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1233
1234    if (ctx->swrast_context) {
1235       _swsetup_DestroyContext(&brw->ctx);
1236       _tnl_DestroyContext(&brw->ctx);
1237    }
1238    _vbo_DestroyContext(&brw->ctx);
1239
1240    if (ctx->swrast_context)
1241       _swrast_DestroyContext(&brw->ctx);
1242
1243    brw_fini_pipe_control(brw);
1244    intel_batchbuffer_free(&brw->batch);
1245
1246    brw_bo_unreference(brw->throttle_batch[1]);
1247    brw_bo_unreference(brw->throttle_batch[0]);
1248    brw->throttle_batch[1] = NULL;
1249    brw->throttle_batch[0] = NULL;
1250
1251    driDestroyOptionCache(&brw->optionCache);
1252
1253    /* free the Mesa context */
1254    _mesa_free_context_data(&brw->ctx);
1255
1256    ralloc_free(brw);
1257    driContextPriv->driverPrivate = NULL;
1258 }
1259
1260 GLboolean
1261 intelUnbindContext(__DRIcontext * driContextPriv)
1262 {
1263    struct gl_context *ctx = driContextPriv->driverPrivate;
1264    _mesa_glthread_finish(ctx);
1265
1266    /* Unset current context and dispath table */
1267    _mesa_make_current(NULL, NULL, NULL);
1268
1269    return true;
1270 }
1271
1272 /**
1273  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1274  * on window system framebuffers.
1275  *
1276  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1277  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1278  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1279  * for a visual where you're guaranteed to be capable, but it turns out that
1280  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1281  * incapable ones, because there's no difference between the two in resources
1282  * used.  Applications thus get built that accidentally rely on the default
1283  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1284  * great...
1285  *
1286  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1287  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1288  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1289  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1290  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1291  * and get no sRGB encode (assuming that both kinds of visual are available).
1292  * Thus our choice to support sRGB by default on our visuals for desktop would
1293  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1294  *
1295  * Unfortunately, renderbuffer setup happens before a context is created.  So
1296  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1297  * context (without an sRGB visual), we go turn that back off before anyone
1298  * finds out.
1299  */
1300 static void
1301 intel_gles3_srgb_workaround(struct brw_context *brw,
1302                             struct gl_framebuffer *fb)
1303 {
1304    struct gl_context *ctx = &brw->ctx;
1305
1306    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1307       return;
1308
1309    for (int i = 0; i < BUFFER_COUNT; i++) {
1310       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1311
1312       /* Check if sRGB was specifically asked for. */
1313       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1314       if (irb && irb->need_srgb)
1315          return;
1316
1317       if (rb)
1318          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1319    }
1320    /* Disable sRGB from framebuffers that are not compatible. */
1321    fb->Visual.sRGBCapable = false;
1322 }
1323
1324 GLboolean
1325 intelMakeCurrent(__DRIcontext * driContextPriv,
1326                  __DRIdrawable * driDrawPriv,
1327                  __DRIdrawable * driReadPriv)
1328 {
1329    struct brw_context *brw;
1330
1331    if (driContextPriv)
1332       brw = (struct brw_context *) driContextPriv->driverPrivate;
1333    else
1334       brw = NULL;
1335
1336    if (driContextPriv) {
1337       struct gl_context *ctx = &brw->ctx;
1338       struct gl_framebuffer *fb, *readFb;
1339
1340       if (driDrawPriv == NULL) {
1341          fb = _mesa_get_incomplete_framebuffer();
1342       } else {
1343          fb = driDrawPriv->driverPrivate;
1344          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1345       }
1346
1347       if (driReadPriv == NULL) {
1348          readFb = _mesa_get_incomplete_framebuffer();
1349       } else {
1350          readFb = driReadPriv->driverPrivate;
1351          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1352       }
1353
1354       /* The sRGB workaround changes the renderbuffer's format. We must change
1355        * the format before the renderbuffer's miptree get's allocated, otherwise
1356        * the formats of the renderbuffer and its miptree will differ.
1357        */
1358       intel_gles3_srgb_workaround(brw, fb);
1359       intel_gles3_srgb_workaround(brw, readFb);
1360
1361       /* If the context viewport hasn't been initialized, force a call out to
1362        * the loader to get buffers so we have a drawable size for the initial
1363        * viewport. */
1364       if (!brw->ctx.ViewportInitialized)
1365          intel_prepare_render(brw);
1366
1367       _mesa_make_current(ctx, fb, readFb);
1368    } else {
1369       GET_CURRENT_CONTEXT(ctx);
1370       _mesa_glthread_finish(ctx);
1371       _mesa_make_current(NULL, NULL, NULL);
1372    }
1373
1374    return true;
1375 }
1376
1377 void
1378 intel_resolve_for_dri2_flush(struct brw_context *brw,
1379                              __DRIdrawable *drawable)
1380 {
1381    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1382
1383    if (devinfo->gen < 6) {
1384       /* MSAA and fast color clear are not supported, so don't waste time
1385        * checking whether a resolve is needed.
1386        */
1387       return;
1388    }
1389
1390    struct gl_framebuffer *fb = drawable->driverPrivate;
1391    struct intel_renderbuffer *rb;
1392
1393    /* Usually, only the back buffer will need to be downsampled. However,
1394     * the front buffer will also need it if the user has rendered into it.
1395     */
1396    static const gl_buffer_index buffers[2] = {
1397          BUFFER_BACK_LEFT,
1398          BUFFER_FRONT_LEFT,
1399    };
1400
1401    for (int i = 0; i < 2; ++i) {
1402       rb = intel_get_renderbuffer(fb, buffers[i]);
1403       if (rb == NULL || rb->mt == NULL)
1404          continue;
1405       if (rb->mt->surf.samples == 1) {
1406          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1407                 rb->layer_count == 1);
1408          intel_miptree_prepare_external(brw, rb->mt);
1409       } else {
1410          intel_renderbuffer_downsample(brw, rb);
1411
1412          /* Call prepare_external on the single-sample miptree to do any
1413           * needed resolves prior to handing it off to the window system.
1414           * This is needed in the case that rb->singlesample_mt is Y-tiled
1415           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1416           * this case, the MSAA resolve above will write compressed data into
1417           * rb->singlesample_mt.
1418           *
1419           * TODO: Some day, if we decide to care about the tiny performance
1420           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1421           * we could detect this case and just allocate the single-sampled
1422           * miptree without aux.  However, that would be a lot of plumbing and
1423           * this is a rather exotic case so it's not really worth it.
1424           */
1425          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1426       }
1427    }
1428 }
1429
1430 static unsigned
1431 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1432 {
1433    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1434 }
1435
1436 static void
1437 intel_query_dri2_buffers(struct brw_context *brw,
1438                          __DRIdrawable *drawable,
1439                          __DRIbuffer **buffers,
1440                          int *count);
1441
1442 static void
1443 intel_process_dri2_buffer(struct brw_context *brw,
1444                           __DRIdrawable *drawable,
1445                           __DRIbuffer *buffer,
1446                           struct intel_renderbuffer *rb,
1447                           const char *buffer_name);
1448
1449 static void
1450 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1451
1452 static void
1453 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1454 {
1455    struct gl_framebuffer *fb = drawable->driverPrivate;
1456    struct intel_renderbuffer *rb;
1457    __DRIbuffer *buffers = NULL;
1458    int count;
1459    const char *region_name;
1460
1461    /* Set this up front, so that in case our buffers get invalidated
1462     * while we're getting new buffers, we don't clobber the stamp and
1463     * thus ignore the invalidate. */
1464    drawable->lastStamp = drawable->dri2.stamp;
1465
1466    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1467       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1468
1469    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1470
1471    if (buffers == NULL)
1472       return;
1473
1474    for (int i = 0; i < count; i++) {
1475        switch (buffers[i].attachment) {
1476        case __DRI_BUFFER_FRONT_LEFT:
1477            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1478            region_name = "dri2 front buffer";
1479            break;
1480
1481        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1482            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1483            region_name = "dri2 fake front buffer";
1484            break;
1485
1486        case __DRI_BUFFER_BACK_LEFT:
1487            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1488            region_name = "dri2 back buffer";
1489            break;
1490
1491        case __DRI_BUFFER_DEPTH:
1492        case __DRI_BUFFER_HIZ:
1493        case __DRI_BUFFER_DEPTH_STENCIL:
1494        case __DRI_BUFFER_STENCIL:
1495        case __DRI_BUFFER_ACCUM:
1496        default:
1497            fprintf(stderr,
1498                    "unhandled buffer attach event, attachment type %d\n",
1499                    buffers[i].attachment);
1500            return;
1501        }
1502
1503        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1504    }
1505
1506 }
1507
1508 void
1509 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1510 {
1511    struct brw_context *brw = context->driverPrivate;
1512    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1513
1514    /* Set this up front, so that in case our buffers get invalidated
1515     * while we're getting new buffers, we don't clobber the stamp and
1516     * thus ignore the invalidate. */
1517    drawable->lastStamp = drawable->dri2.stamp;
1518
1519    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1520       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1521
1522    if (dri_screen->image.loader)
1523       intel_update_image_buffers(brw, drawable);
1524    else
1525       intel_update_dri2_buffers(brw, drawable);
1526
1527    driUpdateFramebufferSize(&brw->ctx, drawable);
1528 }
1529
1530 /**
1531  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1532  * state is required.
1533  */
1534 void
1535 intel_prepare_render(struct brw_context *brw)
1536 {
1537    struct gl_context *ctx = &brw->ctx;
1538    __DRIcontext *driContext = brw->driContext;
1539    __DRIdrawable *drawable;
1540
1541    drawable = driContext->driDrawablePriv;
1542    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1543       if (drawable->lastStamp != drawable->dri2.stamp)
1544          intel_update_renderbuffers(driContext, drawable);
1545       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1546    }
1547
1548    drawable = driContext->driReadablePriv;
1549    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1550       if (drawable->lastStamp != drawable->dri2.stamp)
1551          intel_update_renderbuffers(driContext, drawable);
1552       driContext->dri2.read_stamp = drawable->dri2.stamp;
1553    }
1554
1555    /* If we're currently rendering to the front buffer, the rendering
1556     * that will happen next will probably dirty the front buffer.  So
1557     * mark it as dirty here.
1558     */
1559    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
1560        ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
1561       brw->front_buffer_dirty = true;
1562    }
1563
1564    if (brw->is_shared_buffer_bound) {
1565       /* Subsequent rendering will probably dirty the shared buffer. */
1566       brw->is_shared_buffer_dirty = true;
1567    }
1568 }
1569
1570 /**
1571  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1572  *
1573  * To determine which DRI buffers to request, examine the renderbuffers
1574  * attached to the drawable's framebuffer. Then request the buffers with
1575  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1576  *
1577  * This is called from intel_update_renderbuffers().
1578  *
1579  * \param drawable      Drawable whose buffers are queried.
1580  * \param buffers       [out] List of buffers returned by DRI2 query.
1581  * \param buffer_count  [out] Number of buffers returned.
1582  *
1583  * \see intel_update_renderbuffers()
1584  * \see DRI2GetBuffers()
1585  * \see DRI2GetBuffersWithFormat()
1586  */
1587 static void
1588 intel_query_dri2_buffers(struct brw_context *brw,
1589                          __DRIdrawable *drawable,
1590                          __DRIbuffer **buffers,
1591                          int *buffer_count)
1592 {
1593    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1594    struct gl_framebuffer *fb = drawable->driverPrivate;
1595    int i = 0;
1596    unsigned attachments[8];
1597
1598    struct intel_renderbuffer *front_rb;
1599    struct intel_renderbuffer *back_rb;
1600
1601    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1602    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1603
1604    memset(attachments, 0, sizeof(attachments));
1605    if ((_mesa_is_front_buffer_drawing(fb) ||
1606         _mesa_is_front_buffer_reading(fb) ||
1607         !back_rb) && front_rb) {
1608       /* If a fake front buffer is in use, then querying for
1609        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1610        * the real front buffer to the fake front buffer.  So before doing the
1611        * query, we need to make sure all the pending drawing has landed in the
1612        * real front buffer.
1613        */
1614       intel_batchbuffer_flush(brw);
1615       intel_flush_front(&brw->ctx);
1616
1617       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1618       attachments[i++] = intel_bits_per_pixel(front_rb);
1619    } else if (front_rb && brw->front_buffer_dirty) {
1620       /* We have pending front buffer rendering, but we aren't querying for a
1621        * front buffer.  If the front buffer we have is a fake front buffer,
1622        * the X server is going to throw it away when it processes the query.
1623        * So before doing the query, make sure all the pending drawing has
1624        * landed in the real front buffer.
1625        */
1626       intel_batchbuffer_flush(brw);
1627       intel_flush_front(&brw->ctx);
1628    }
1629
1630    if (back_rb) {
1631       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1632       attachments[i++] = intel_bits_per_pixel(back_rb);
1633    }
1634
1635    assert(i <= ARRAY_SIZE(attachments));
1636
1637    *buffers =
1638       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1639                                                     &drawable->w,
1640                                                     &drawable->h,
1641                                                     attachments, i / 2,
1642                                                     buffer_count,
1643                                                     drawable->loaderPrivate);
1644 }
1645
1646 /**
1647  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1648  *
1649  * This is called from intel_update_renderbuffers().
1650  *
1651  * \par Note:
1652  *    DRI buffers whose attachment point is DRI2BufferStencil or
1653  *    DRI2BufferDepthStencil are handled as special cases.
1654  *
1655  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1656  *        that is passed to brw_bo_gem_create_from_name().
1657  *
1658  * \see intel_update_renderbuffers()
1659  */
1660 static void
1661 intel_process_dri2_buffer(struct brw_context *brw,
1662                           __DRIdrawable *drawable,
1663                           __DRIbuffer *buffer,
1664                           struct intel_renderbuffer *rb,
1665                           const char *buffer_name)
1666 {
1667    struct gl_framebuffer *fb = drawable->driverPrivate;
1668    struct brw_bo *bo;
1669
1670    if (!rb)
1671       return;
1672
1673    unsigned num_samples = rb->Base.Base.NumSamples;
1674
1675    /* We try to avoid closing and reopening the same BO name, because the first
1676     * use of a mapping of the buffer involves a bunch of page faulting which is
1677     * moderately expensive.
1678     */
1679    struct intel_mipmap_tree *last_mt;
1680    if (num_samples == 0)
1681       last_mt = rb->mt;
1682    else
1683       last_mt = rb->singlesample_mt;
1684
1685    uint32_t old_name = 0;
1686    if (last_mt) {
1687        /* The bo already has a name because the miptree was created by a
1688         * previous call to intel_process_dri2_buffer(). If a bo already has a
1689         * name, then brw_bo_flink() is a low-cost getter.  It does not
1690         * create a new name.
1691         */
1692       brw_bo_flink(last_mt->bo, &old_name);
1693    }
1694
1695    if (old_name == buffer->name)
1696       return;
1697
1698    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1699       fprintf(stderr,
1700               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1701               buffer->name, buffer->attachment,
1702               buffer->cpp, buffer->pitch);
1703    }
1704
1705    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1706                                           buffer->name);
1707    if (!bo) {
1708       fprintf(stderr,
1709               "Failed to open BO for returned DRI2 buffer "
1710               "(%dx%d, %s, named %d).\n"
1711               "This is likely a bug in the X Server that will lead to a "
1712               "crash soon.\n",
1713               drawable->w, drawable->h, buffer_name, buffer->name);
1714       return;
1715    }
1716
1717    uint32_t tiling, swizzle;
1718    brw_bo_get_tiling(bo, &tiling, &swizzle);
1719
1720    struct intel_mipmap_tree *mt =
1721       intel_miptree_create_for_bo(brw,
1722                                   bo,
1723                                   intel_rb_format(rb),
1724                                   0,
1725                                   drawable->w,
1726                                   drawable->h,
1727                                   1,
1728                                   buffer->pitch,
1729                                   isl_tiling_from_i915_tiling(tiling),
1730                                   MIPTREE_CREATE_DEFAULT);
1731    if (!mt) {
1732       brw_bo_unreference(bo);
1733       return;
1734    }
1735
1736    /* We got this BO from X11.  We cana't assume that we have coherent texture
1737     * access because X may suddenly decide to use it for scan-out which would
1738     * destroy coherency.
1739     */
1740    bo->cache_coherent = false;
1741
1742    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1743                                                  drawable->w, drawable->h,
1744                                                  buffer->pitch)) {
1745       brw_bo_unreference(bo);
1746       intel_miptree_release(&mt);
1747       return;
1748    }
1749
1750    if (_mesa_is_front_buffer_drawing(fb) &&
1751        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1752         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1753        rb->Base.Base.NumSamples > 1) {
1754       intel_renderbuffer_upsample(brw, rb);
1755    }
1756
1757    assert(rb->mt);
1758
1759    brw_bo_unreference(bo);
1760 }
1761
1762 /**
1763  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1764  *
1765  * To determine which DRI buffers to request, examine the renderbuffers
1766  * attached to the drawable's framebuffer. Then request the buffers from
1767  * the image loader
1768  *
1769  * This is called from intel_update_renderbuffers().
1770  *
1771  * \param drawable      Drawable whose buffers are queried.
1772  * \param buffers       [out] List of buffers returned by DRI2 query.
1773  * \param buffer_count  [out] Number of buffers returned.
1774  *
1775  * \see intel_update_renderbuffers()
1776  */
1777
1778 static void
1779 intel_update_image_buffer(struct brw_context *intel,
1780                           __DRIdrawable *drawable,
1781                           struct intel_renderbuffer *rb,
1782                           __DRIimage *buffer,
1783                           enum __DRIimageBufferMask buffer_type)
1784 {
1785    struct gl_framebuffer *fb = drawable->driverPrivate;
1786
1787    if (!rb || !buffer->bo)
1788       return;
1789
1790    unsigned num_samples = rb->Base.Base.NumSamples;
1791
1792    /* Check and see if we're already bound to the right
1793     * buffer object
1794     */
1795    struct intel_mipmap_tree *last_mt;
1796    if (num_samples == 0)
1797       last_mt = rb->mt;
1798    else
1799       last_mt = rb->singlesample_mt;
1800
1801    if (last_mt && last_mt->bo == buffer->bo) {
1802       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1803          intel_miptree_make_shareable(intel, last_mt);
1804       }
1805       return;
1806    }
1807
1808    /* Only allow internal compression if samples == 0.  For multisampled
1809     * window system buffers, the only thing the single-sampled buffer is used
1810     * for is as a resolve target.  If we do any compression beyond what is
1811     * supported by the window system, we will just have to resolve so it's
1812     * probably better to just not bother.
1813     */
1814    const bool allow_internal_aux = (num_samples == 0);
1815
1816    struct intel_mipmap_tree *mt =
1817       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1818                                          intel_rb_format(rb),
1819                                          allow_internal_aux);
1820    if (!mt)
1821       return;
1822
1823    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1824                                                  buffer->width, buffer->height,
1825                                                  buffer->pitch)) {
1826       intel_miptree_release(&mt);
1827       return;
1828    }
1829
1830    if (_mesa_is_front_buffer_drawing(fb) &&
1831        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1832        rb->Base.Base.NumSamples > 1) {
1833       intel_renderbuffer_upsample(intel, rb);
1834    }
1835
1836    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1837       /* The compositor and the application may access this image
1838        * concurrently. The display hardware may even scanout the image while
1839        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1840        * concurrent access, so permanently disable aux for this miptree.
1841        *
1842        * Perhaps we could improve overall application performance by
1843        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1844        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1845        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1846        * approach to be highly dependent on the application's GL usage.
1847        *
1848        * I [chadv] expect clever disabling/reenabling to be counterproductive
1849        * in the use cases I care about: applications that render nearly
1850        * realtime handwriting to the surface while possibly undergiong
1851        * simultaneously scanout as a display plane. The app requires low
1852        * render latency. Even though the app spends most of its time in
1853        * shared-buffer mode, it also frequently transitions between
1854        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1855        * mode.  Visual sutter during the transitions should be avoided.
1856        *
1857        * In this case, I [chadv] believe reducing the GPU workload at
1858        * shared-buffer/double-buffer transitions would offer a smoother app
1859        * experience than any savings due to aux compression. But I've
1860        * collected no data to prove my theory.
1861        */
1862       intel_miptree_make_shareable(intel, mt);
1863    }
1864 }
1865
1866 static void
1867 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1868 {
1869    struct gl_framebuffer *fb = drawable->driverPrivate;
1870    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1871    struct intel_renderbuffer *front_rb;
1872    struct intel_renderbuffer *back_rb;
1873    struct __DRIimageList images;
1874    mesa_format format;
1875    uint32_t buffer_mask = 0;
1876    int ret;
1877
1878    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1879    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1880
1881    if (back_rb)
1882       format = intel_rb_format(back_rb);
1883    else if (front_rb)
1884       format = intel_rb_format(front_rb);
1885    else
1886       return;
1887
1888    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1889                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1890       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1891    }
1892
1893    if (back_rb)
1894       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1895
1896    ret = dri_screen->image.loader->getBuffers(drawable,
1897                                               driGLFormatToImageFormat(format),
1898                                               &drawable->dri2.stamp,
1899                                               drawable->loaderPrivate,
1900                                               buffer_mask,
1901                                               &images);
1902    if (!ret)
1903       return;
1904
1905    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1906       drawable->w = images.front->width;
1907       drawable->h = images.front->height;
1908       intel_update_image_buffer(brw,
1909                                 drawable,
1910                                 front_rb,
1911                                 images.front,
1912                                 __DRI_IMAGE_BUFFER_FRONT);
1913    }
1914
1915    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1916       drawable->w = images.back->width;
1917       drawable->h = images.back->height;
1918       intel_update_image_buffer(brw,
1919                                 drawable,
1920                                 back_rb,
1921                                 images.back,
1922                                 __DRI_IMAGE_BUFFER_BACK);
1923    }
1924
1925    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1926       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1927       drawable->w = images.back->width;
1928       drawable->h = images.back->height;
1929       intel_update_image_buffer(brw,
1930                                 drawable,
1931                                 back_rb,
1932                                 images.back,
1933                                 __DRI_IMAGE_BUFFER_SHARED);
1934       brw->is_shared_buffer_bound = true;
1935    } else {
1936       brw->is_shared_buffer_bound = false;
1937       brw->is_shared_buffer_dirty = false;
1938    }
1939 }