src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "main/imports.h"
  41 #include "main/macros.h"
  42 #include "main/points.h"
  43 #include "main/version.h"
  44 #include "main/vtxfmt.h"
  45 #include "main/texobj.h"
  46 #include "main/framebuffer.h"
  47 #include "main/stencil.h"
  48 #include "main/state.h"
  49 #include "main/spirv_extensions.h"
  50
  51 #include "vbo/vbo.h"
  52
  53 #include "drivers/common/driverfuncs.h"
  54 #include "drivers/common/meta.h"
  55 #include "utils.h"
  56
  57 #include "brw_context.h"
  58 #include "brw_defines.h"
  59 #include "brw_blorp.h"
  60 #include "brw_draw.h"
  61 #include "brw_state.h"
  62
  63 #include "intel_batchbuffer.h"
  64 #include "intel_buffer_objects.h"
  65 #include "intel_buffers.h"
  66 #include "intel_fbo.h"
  67 #include "intel_mipmap_tree.h"
  68 #include "intel_pixel.h"
  69 #include "intel_image.h"
  70 #include "intel_tex.h"
  71 #include "intel_tex_obj.h"
  72
  73 #include "swrast_setup/swrast_setup.h"
  74 #include "tnl/tnl.h"
  75 #include "tnl/t_pipeline.h"
  76 #include "util/ralloc.h"
  77 #include "util/debug.h"
  78 #include "util/disk_cache.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    const char *chipset;
 107    static char buffer[128];
 108    char *bsw = NULL;
 109
 110    switch (screen->deviceID) {
 111 #undef CHIPSET
 112 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 113 #include "pci_ids/i965_pci_ids.h"
 114    default:
 115       chipset = "Unknown Intel Chipset";
 116       break;
 117    }
 118
 119    /* Braswell branding is funny, so we have to fix it up here */
 120    if (screen->deviceID == 0x22B1) {
 121       bsw = strdup(chipset);
 122       char *needle = strstr(bsw, "XXX");
 123       if (needle) {
 124          memcpy(needle, get_bsw_model(screen), 3);
 125          chipset = bsw;
 126       }
 127    }
 128
 129    (void) driGetRendererString(buffer, chipset, 0);
 130    free(bsw);
 131    return buffer;
 132 }
 133
 134 static const GLubyte *
 135 intel_get_string(struct gl_context * ctx, GLenum name)
 136 {
 137    const struct brw_context *const brw = brw_context(ctx);
 138
 139    switch (name) {
 140    case GL_VENDOR:
 141       return (GLubyte *) brw_vendor_string;
 142
 143    case GL_RENDERER:
 144       return
 145          (GLubyte *) brw_get_renderer_string(brw->screen);
 146
 147    default:
 148       return NULL;
 149    }
 150 }
 151
 152 static void
 153 brw_set_background_context(struct gl_context *ctx,
 154                            struct util_queue_monitoring *queue_info)
 155 {
 156    struct brw_context *brw = brw_context(ctx);
 157    __DRIcontext *driContext = brw->driContext;
 158    __DRIscreen *driScreen = driContext->driScreenPriv;
 159    const __DRIbackgroundCallableExtension *backgroundCallable =
 160       driScreen->dri2.backgroundCallable;
 161
 162    /* Note: Mesa will only call this function if we've called
 163     * _mesa_enable_multithreading().  We only do that if the loader exposed
 164     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 165     * backgroundCallable is not NULL.
 166     */
 167    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 168 }
 169
 170 static void
 171 intel_viewport(struct gl_context *ctx)
 172 {
 173    struct brw_context *brw = brw_context(ctx);
 174    __DRIcontext *driContext = brw->driContext;
 175
 176    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 177       if (driContext->driDrawablePriv)
 178          dri2InvalidateDrawable(driContext->driDrawablePriv);
 179       if (driContext->driReadablePriv)
 180          dri2InvalidateDrawable(driContext->driReadablePriv);
 181    }
 182 }
 183
 184 static void
 185 intel_update_framebuffer(struct gl_context *ctx,
 186                          struct gl_framebuffer *fb)
 187 {
 188    struct brw_context *brw = brw_context(ctx);
 189
 190    /* Quantize the derived default number of samples
 191     */
 192    fb->DefaultGeometry._NumSamples =
 193       intel_quantize_num_samples(brw->screen,
 194                                  fb->DefaultGeometry.NumSamples);
 195 }
 196
 197 static void
 198 intel_update_state(struct gl_context * ctx)
 199 {
 200    GLuint new_state = ctx->NewState;
 201    struct brw_context *brw = brw_context(ctx);
 202
 203    if (ctx->swrast_context)
 204       _swrast_InvalidateState(ctx, new_state);
 205
 206    brw->NewGLState |= new_state;
 207
 208    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 209       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 210
 211    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 212       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 213       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 214       brw->stencil_write_enabled =
 215          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 216    }
 217
 218    if (new_state & _NEW_POLYGON)
 219       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 220
 221    if (new_state & _NEW_BUFFERS) {
 222       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 223       if (ctx->DrawBuffer != ctx->ReadBuffer)
 224          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 225    }
 226 }
 227
 228 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 229
 230 static void
 231 intel_flush_front(struct gl_context *ctx)
 232 {
 233    struct brw_context *brw = brw_context(ctx);
 234    __DRIcontext *driContext = brw->driContext;
 235    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 236    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 237
 238    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 239       if (flushFront(dri_screen) && driDrawable &&
 240           driDrawable->loaderPrivate) {
 241
 242          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 243           *
 244           * This potentially resolves both front and back buffer. It
 245           * is unnecessary to resolve the back, but harms nothing except
 246           * performance. And no one cares about front-buffer render
 247           * performance.
 248           */
 249          intel_resolve_for_dri2_flush(brw, driDrawable);
 250          intel_batchbuffer_flush(brw);
 251
 252          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 253
 254          /* We set the dirty bit in intel_prepare_render() if we're
 255           * front buffer rendering once we get there.
 256           */
 257          brw->front_buffer_dirty = false;
 258       }
 259    }
 260 }
 261
 262 static void
 263 brw_display_shared_buffer(struct brw_context *brw)
 264 {
 265    __DRIcontext *dri_context = brw->driContext;
 266    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 267    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 268    int fence_fd = -1;
 269
 270    if (!brw->is_shared_buffer_bound)
 271       return;
 272
 273    if (!brw->is_shared_buffer_dirty)
 274       return;
 275
 276    if (brw->screen->has_exec_fence) {
 277       /* This function is always called during a flush operation, so there is
 278        * no need to flush again here. But we want to provide a fence_fd to the
 279        * loader, and a redundant flush is the easiest way to acquire one.
 280        */
 281       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 282          return;
 283    }
 284
 285    dri_screen->mutableRenderBuffer.loader
 286       ->displaySharedBuffer(dri_drawable, fence_fd,
 287                             dri_drawable->loaderPrivate);
 288    brw->is_shared_buffer_dirty = false;
 289 }
 290
 291 static void
 292 intel_glFlush(struct gl_context *ctx)
 293 {
 294    struct brw_context *brw = brw_context(ctx);
 295
 296    intel_batchbuffer_flush(brw);
 297    intel_flush_front(ctx);
 298    brw_display_shared_buffer(brw);
 299    brw->need_flush_throttle = true;
 300 }
 301
 302 static void
 303 intel_finish(struct gl_context * ctx)
 304 {
 305    struct brw_context *brw = brw_context(ctx);
 306
 307    intel_glFlush(ctx);
 308
 309    if (brw->batch.last_bo)
 310       brw_bo_wait_rendering(brw->batch.last_bo);
 311 }
 312
 313 static void
 314 brw_init_driver_functions(struct brw_context *brw,
 315                           struct dd_function_table *functions)
 316 {
 317    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 318
 319    _mesa_init_driver_functions(functions);
 320
 321    /* GLX uses DRI2 invalidate events to handle window resizing.
 322     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 323     * which doesn't provide a mechanism for snooping the event queues.
 324     *
 325     * So EGL still relies on viewport hacks to handle window resizing.
 326     * This should go away with DRI3000.
 327     */
 328    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 329       functions->Viewport = intel_viewport;
 330
 331    functions->Flush = intel_glFlush;
 332    functions->Finish = intel_finish;
 333    functions->GetString = intel_get_string;
 334    functions->UpdateState = intel_update_state;
 335
 336    brw_init_draw_functions(functions);
 337    intelInitTextureFuncs(functions);
 338    intelInitTextureImageFuncs(functions);
 339    intelInitTextureCopyImageFuncs(functions);
 340    intelInitCopyImageFuncs(functions);
 341    intelInitClearFuncs(functions);
 342    intelInitBufferFuncs(functions);
 343    intelInitPixelFuncs(functions);
 344    intelInitBufferObjectFuncs(functions);
 345    brw_init_syncobj_functions(functions);
 346    brw_init_object_purgeable_functions(functions);
 347
 348    brwInitFragProgFuncs( functions );
 349    brw_init_common_queryobj_functions(functions);
 350    if (devinfo->gen >= 8 || devinfo->is_haswell)
 351       hsw_init_queryobj_functions(functions);
 352    else if (devinfo->gen >= 6)
 353       gen6_init_queryobj_functions(functions);
 354    else
 355       gen4_init_queryobj_functions(functions);
 356    brw_init_compute_functions(functions);
 357    brw_init_conditional_render_functions(functions);
 358
 359    functions->GenerateMipmap = brw_generate_mipmap;
 360
 361    functions->QueryInternalFormat = brw_query_internal_format;
 362
 363    functions->NewTransformFeedback = brw_new_transform_feedback;
 364    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 365    if (can_do_mi_math_and_lrr(brw->screen)) {
 366       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 367       functions->EndTransformFeedback = hsw_end_transform_feedback;
 368       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 369       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 370    } else if (devinfo->gen >= 7) {
 371       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 372       functions->EndTransformFeedback = gen7_end_transform_feedback;
 373       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 374       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 375       functions->GetTransformFeedbackVertexCount =
 376          brw_get_transform_feedback_vertex_count;
 377    } else {
 378       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 379       functions->EndTransformFeedback = brw_end_transform_feedback;
 380       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 381       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 382       functions->GetTransformFeedbackVertexCount =
 383          brw_get_transform_feedback_vertex_count;
 384    }
 385
 386    if (devinfo->gen >= 6)
 387       functions->GetSamplePosition = gen6_get_sample_position;
 388
 389    /* GL_ARB_get_program_binary */
 390    brw_program_binary_init(brw->screen->deviceID);
 391    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 392    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 393    functions->ProgramBinaryDeserializeDriverBlob =
 394       brw_deserialize_program_binary;
 395
 396    if (brw->screen->disk_cache) {
 397       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 398    }
 399
 400    functions->SetBackgroundContext = brw_set_background_context;
 401 }
 402
 403 static void
 404 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 405 {
 406    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 407    struct gl_context *ctx = &brw->ctx;
 408
 409    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 410     * you should enable the extension only on gen7+, but just in case let's
 411     * assert it.
 412     */
 413    assert(devinfo->gen >= 7);
 414
 415    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 416    ctx->Const.SpirVCapabilities.draw_parameters = true;
 417    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 418    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 419    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 420    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 421    ctx->Const.SpirVCapabilities.tessellation = true;
 422    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 423    ctx->Const.SpirVCapabilities.variable_pointers = true;
 424 }
 425
 426 static void
 427 brw_initialize_context_constants(struct brw_context *brw)
 428 {
 429    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 430    struct gl_context *ctx = &brw->ctx;
 431    const struct brw_compiler *compiler = brw->screen->compiler;
 432
 433    const bool stage_exists[MESA_SHADER_STAGES] = {
 434       [MESA_SHADER_VERTEX] = true,
 435       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 436       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 437       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 438       [MESA_SHADER_FRAGMENT] = true,
 439       [MESA_SHADER_COMPUTE] =
 440          (_mesa_is_desktop_gl(ctx) &&
 441           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 442          (ctx->API == API_OPENGLES2 &&
 443           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 444    };
 445
 446    unsigned num_stages = 0;
 447    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 448       if (stage_exists[i])
 449          num_stages++;
 450    }
 451
 452    unsigned max_samplers =
 453       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 454
 455    ctx->Const.MaxDualSourceDrawBuffers = 1;
 456    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 457    ctx->Const.MaxCombinedShaderOutputResources =
 458       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 459
 460    /* The timestamp register we can read for glGetTimestamp() is
 461     * sometimes only 32 bits, before scaling to nanoseconds (depending
 462     * on kernel).
 463     *
 464     * Once scaled to nanoseconds the timestamp would roll over at a
 465     * non-power-of-two, so an application couldn't use
 466     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 467     * report 36 bits and truncate at that (rolling over 5 times as
 468     * often as the HW counter), and when the 32-bit counter rolls
 469     * over, it happens to also be at a rollover in the reported value
 470     * from near (1<<36) to 0.
 471     *
 472     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 473     * rolls over every ~69 seconds.
 474     */
 475    ctx->Const.QueryCounterBits.Timestamp = 36;
 476
 477    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 478    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 479    if (devinfo->gen >= 7) {
 480       ctx->Const.MaxRenderbufferSize = 16384;
 481       ctx->Const.MaxTextureSize = 16384;
 482       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 483    } else {
 484       ctx->Const.MaxRenderbufferSize = 8192;
 485       ctx->Const.MaxTextureSize = 8192;
 486       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 487    }
 488    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 489    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 490    ctx->Const.MaxTextureMbytes = 1536;
 491    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 492    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 493    ctx->Const.MaxTextureLodBias = 15.0;
 494    ctx->Const.StripTextureBorder = true;
 495    if (devinfo->gen >= 7) {
 496       ctx->Const.MaxProgramTextureGatherComponents = 4;
 497       ctx->Const.MinProgramTextureGatherOffset = -32;
 498       ctx->Const.MaxProgramTextureGatherOffset = 31;
 499    } else if (devinfo->gen == 6) {
 500       ctx->Const.MaxProgramTextureGatherComponents = 1;
 501       ctx->Const.MinProgramTextureGatherOffset = -8;
 502       ctx->Const.MaxProgramTextureGatherOffset = 7;
 503    }
 504
 505    ctx->Const.MaxUniformBlockSize = 65536;
 506
 507    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 508       struct gl_program_constants *prog = &ctx->Const.Program[i];
 509
 510       if (!stage_exists[i])
 511          continue;
 512
 513       prog->MaxTextureImageUnits = max_samplers;
 514
 515       prog->MaxUniformBlocks = BRW_MAX_UBO;
 516       prog->MaxCombinedUniformComponents =
 517          prog->MaxUniformComponents +
 518          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 519
 520       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 521       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 522       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 523       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 524    }
 525
 526    ctx->Const.MaxTextureUnits =
 527       MIN2(ctx->Const.MaxTextureCoordUnits,
 528            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 529
 530    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 531    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 532    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 533    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 534    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 535    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 536    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 537
 538
 539    /* Hardware only supports a limited number of transform feedback buffers.
 540     * So we need to override the Mesa default (which is based only on software
 541     * limits).
 542     */
 543    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 544
 545    /* On Gen6, in the worst case, we use up one binding table entry per
 546     * transform feedback component (see comments above the definition of
 547     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 548     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 549     * BRW_MAX_SOL_BINDINGS.
 550     *
 551     * In "separate components" mode, we need to divide this value by
 552     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 553     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 554     */
 555    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 556    ctx->Const.MaxTransformFeedbackSeparateComponents =
 557       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 558
 559    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 560       !can_do_mi_math_and_lrr(brw->screen);
 561
 562    int max_samples;
 563    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 564    const int clamp_max_samples =
 565       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 566
 567    if (clamp_max_samples < 0) {
 568       max_samples = msaa_modes[0];
 569    } else {
 570       /* Select the largest supported MSAA mode that does not exceed
 571        * clamp_max_samples.
 572        */
 573       max_samples = 0;
 574       for (int i = 0; msaa_modes[i] != 0; ++i) {
 575          if (msaa_modes[i] <= clamp_max_samples) {
 576             max_samples = msaa_modes[i];
 577             break;
 578          }
 579       }
 580    }
 581
 582    ctx->Const.MaxSamples = max_samples;
 583    ctx->Const.MaxColorTextureSamples = max_samples;
 584    ctx->Const.MaxDepthTextureSamples = max_samples;
 585    ctx->Const.MaxIntegerSamples = max_samples;
 586    ctx->Const.MaxImageSamples = 0;
 587
 588    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 589     * to map indices of rectangular grid to sample numbers within a pixel.
 590     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 591     * extension implementation. For more details see the comment above
 592     * gen6_set_sample_maps() definition.
 593     */
 594    gen6_set_sample_maps(ctx);
 595
 596    ctx->Const.MinLineWidth = 1.0;
 597    ctx->Const.MinLineWidthAA = 1.0;
 598    if (devinfo->gen >= 6) {
 599       ctx->Const.MaxLineWidth = 7.375;
 600       ctx->Const.MaxLineWidthAA = 7.375;
 601       ctx->Const.LineWidthGranularity = 0.125;
 602    } else {
 603       ctx->Const.MaxLineWidth = 7.0;
 604       ctx->Const.MaxLineWidthAA = 7.0;
 605       ctx->Const.LineWidthGranularity = 0.5;
 606    }
 607
 608    /* For non-antialiased lines, we have to round the line width to the
 609     * nearest whole number. Make sure that we don't advertise a line
 610     * width that, when rounded, will be beyond the actual hardware
 611     * maximum.
 612     */
 613    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 614
 615    ctx->Const.MinPointSize = 1.0;
 616    ctx->Const.MinPointSizeAA = 1.0;
 617    ctx->Const.MaxPointSize = 255.0;
 618    ctx->Const.MaxPointSizeAA = 255.0;
 619    ctx->Const.PointSizeGranularity = 1.0;
 620
 621    if (devinfo->gen >= 5 || devinfo->is_g4x)
 622       ctx->Const.MaxClipPlanes = 8;
 623
 624    ctx->Const.GLSLFragCoordIsSysVal = true;
 625    ctx->Const.GLSLTessLevelsAsInputs = true;
 626    ctx->Const.PrimitiveRestartForPatches = true;
 627
 628    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 629    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 630    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 631    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 632    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 633    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 634    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 635    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 636    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 637    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 638    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 639    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 640       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 641            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 642
 643    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 644    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 645    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 646    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 647    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 648    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 649    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 650    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 651    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 652       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 653            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 654
 655    /* Fragment shaders use real, 32-bit twos-complement integers for all
 656     * integer types.
 657     */
 658    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 659    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 660    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 661    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 662    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 663
 664    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 665    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 666    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 667    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 668    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 669
 670    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 671     * but we're not sure how it's actually done for vertex order,
 672     * that affect provoking vertex decision. Always use last vertex
 673     * convention for quad primitive which works as expected for now.
 674     */
 675    if (devinfo->gen >= 6)
 676       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 677
 678    ctx->Const.NativeIntegers = true;
 679
 680    /* Regarding the CMP instruction, the Ivybridge PRM says:
 681     *
 682     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 683     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 684     *    0xFFFFFFFF) is assigned to dst."
 685     *
 686     * but PRMs for earlier generations say
 687     *
 688     *   "In dword format, one GRF may store up to 8 results. When the register
 689     *    is used later as a vector of Booleans, as only LSB at each channel
 690     *    contains meaning [sic] data, software should make sure all higher bits
 691     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 692     *
 693     * We select the representation of a true boolean uniform to be ~0, and fix
 694     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 695     */
 696    ctx->Const.UniformBooleanTrue = ~0;
 697
 698    /* From the gen4 PRM, volume 4 page 127:
 699     *
 700     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 701     *      the base address of the first element of the surface, computed in
 702     *      software by adding the surface base address to the byte offset of
 703     *      the element in the buffer."
 704     *
 705     * However, unaligned accesses are slower, so enforce buffer alignment.
 706     *
 707     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 708     * restriction: the start of the buffer needs to be 32B aligned.
 709     */
 710    ctx->Const.UniformBufferOffsetAlignment = 32;
 711
 712    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 713     * that we can safely have the CPU and GPU writing the same SSBO on
 714     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 715     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 716     * be updating disjoint regions of the buffer simultaneously and that will
 717     * break if the regions overlap the same cacheline.
 718     */
 719    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 720    ctx->Const.TextureBufferOffsetAlignment = 16;
 721    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 722
 723    if (devinfo->gen >= 6) {
 724       ctx->Const.MaxVarying = 32;
 725       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 726       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 727          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 728       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 729       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 730       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 731       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 732       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 733       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 734    }
 735
 736    /* We want the GLSL compiler to emit code that uses condition codes */
 737    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 738       ctx->Const.ShaderCompilerOptions[i] =
 739          brw->screen->compiler->glsl_compiler_options[i];
 740    }
 741
 742    if (devinfo->gen >= 7) {
 743       ctx->Const.MaxViewportWidth = 32768;
 744       ctx->Const.MaxViewportHeight = 32768;
 745    }
 746
 747    /* ARB_viewport_array, OES_viewport_array */
 748    if (devinfo->gen >= 6) {
 749       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 750       ctx->Const.ViewportSubpixelBits = 8;
 751
 752       /* Cast to float before negating because MaxViewportWidth is unsigned.
 753        */
 754       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 755       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 756    }
 757
 758    /* ARB_gpu_shader5 */
 759    if (devinfo->gen >= 7)
 760       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 761
 762    /* ARB_framebuffer_no_attachments */
 763    ctx->Const.MaxFramebufferWidth = 16384;
 764    ctx->Const.MaxFramebufferHeight = 16384;
 765    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 766    ctx->Const.MaxFramebufferSamples = max_samples;
 767
 768    /* OES_primitive_bounding_box */
 769    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 770
 771    /* TODO: We should be able to use STD430 packing by default on all hardware
 772     * but some piglit tests [1] currently fail on SNB when this is enabled.
 773     * The problem is the messages we're using for doing uniform pulls
 774     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 775     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 776     * sampler which doesn't have these restrictions.
 777     *
 778     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 779     * pull an entire cache line at a time for constant offset loads both of
 780     * which support almost any alignment.
 781     *
 782     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 783     */
 784    if (devinfo->gen >= 7)
 785       ctx->Const.UseSTD430AsDefaultPacking = true;
 786
 787    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 788       ctx->Const.AllowMappedBuffersDuringExecution = true;
 789
 790    /* GL_ARB_get_program_binary */
 791    ctx->Const.NumProgramBinaryFormats = 1;
 792 }
 793
 794 static void
 795 brw_initialize_cs_context_constants(struct brw_context *brw)
 796 {
 797    struct gl_context *ctx = &brw->ctx;
 798    const struct intel_screen *screen = brw->screen;
 799    struct gen_device_info *devinfo = &brw->screen->devinfo;
 800
 801    /* FINISHME: Do this for all platforms that the kernel supports */
 802    if (devinfo->is_cherryview &&
 803        screen->subslice_total > 0 && screen->eu_total > 0) {
 804       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 805       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 806
 807       /* Fuse configurations may give more threads than expected, never less. */
 808       if (max_cs_threads > devinfo->max_cs_threads)
 809          devinfo->max_cs_threads = max_cs_threads;
 810    }
 811
 812    /* Maximum number of scalar compute shader invocations that can be run in
 813     * parallel in the same subslice assuming SIMD32 dispatch.
 814     *
 815     * We don't advertise more than 64 threads, because we are limited to 64 by
 816     * our usage of thread_width_max in the gpgpu walker command. This only
 817     * currently impacts Haswell, which otherwise might be able to advertise 70
 818     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 819     * required the number of invocation needed for ARB_compute_shader.
 820     */
 821    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 822    const uint32_t max_invocations = 32 * max_threads;
 823    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 824    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 825    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 826    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 827    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 828 }
 829
 830 /**
 831  * Process driconf (drirc) options, setting appropriate context flags.
 832  *
 833  * intelInitExtensions still pokes at optionCache directly, in order to
 834  * avoid advertising various extensions.  No flags are set, so it makes
 835  * sense to continue doing that there.
 836  */
 837 static void
 838 brw_process_driconf_options(struct brw_context *brw)
 839 {
 840    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 841    struct gl_context *ctx = &brw->ctx;
 842
 843    driOptionCache *options = &brw->optionCache;
 844    driParseConfigFiles(options, &brw->screen->optionCache,
 845                        brw->driContext->driScreenPriv->myNum,
 846                        "i965", NULL);
 847
 848    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 849    switch (bo_reuse_mode) {
 850    case DRI_CONF_BO_REUSE_DISABLED:
 851       break;
 852    case DRI_CONF_BO_REUSE_ALL:
 853       brw_bufmgr_enable_reuse(brw->bufmgr);
 854       break;
 855    }
 856
 857    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 858        brw->has_hiz = false;
 859        /* On gen6, you can only do separate stencil with HIZ. */
 860        if (devinfo->gen == 6)
 861           brw->has_separate_stencil = false;
 862    }
 863
 864    if (driQueryOptionb(options, "mesa_no_error"))
 865       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 866
 867    if (driQueryOptionb(options, "always_flush_batch")) {
 868       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 869       brw->always_flush_batch = true;
 870    }
 871
 872    if (driQueryOptionb(options, "always_flush_cache")) {
 873       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 874       brw->always_flush_cache = true;
 875    }
 876
 877    if (driQueryOptionb(options, "disable_throttling")) {
 878       fprintf(stderr, "disabling flush throttling\n");
 879       brw->disable_throttling = true;
 880    }
 881
 882    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 883
 884    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 885       brw->screen->compiler->precise_trig = true;
 886
 887    ctx->Const.ForceGLSLExtensionsWarn =
 888       driQueryOptionb(options, "force_glsl_extensions_warn");
 889
 890    ctx->Const.ForceGLSLVersion =
 891       driQueryOptioni(options, "force_glsl_version");
 892
 893    ctx->Const.DisableGLSLLineContinuations =
 894       driQueryOptionb(options, "disable_glsl_line_continuations");
 895
 896    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 897       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 898
 899    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 900       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 901
 902    ctx->Const.AllowHigherCompatVersion =
 903       driQueryOptionb(options, "allow_higher_compat_version");
 904
 905    ctx->Const.ForceGLSLAbsSqrt =
 906       driQueryOptionb(options, "force_glsl_abs_sqrt");
 907
 908    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 909
 910    brw->dual_color_blend_by_location =
 911       driQueryOptionb(options, "dual_color_blend_by_location");
 912
 913    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 914       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 915
 916    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 917    driComputeOptionsSha1(&brw->screen->optionCache,
 918                          ctx->Const.dri_config_options_sha1);
 919 }
 920
 921 GLboolean
 922 brwCreateContext(gl_api api,
 923                  const struct gl_config *mesaVis,
 924                  __DRIcontext *driContextPriv,
 925                  const struct __DriverContextConfig *ctx_config,
 926                  unsigned *dri_ctx_error,
 927                  void *sharedContextPrivate)
 928 {
 929    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 930    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 931    const struct gen_device_info *devinfo = &screen->devinfo;
 932    struct dd_function_table functions;
 933
 934    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 935     * provides us with context reset notifications.
 936     */
 937    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 938                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 939                             __DRI_CTX_FLAG_NO_ERROR;
 940
 941    if (screen->has_context_reset_notification)
 942       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 943
 944    if (ctx_config->flags & ~allowed_flags) {
 945       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 946       return false;
 947    }
 948
 949    if (ctx_config->attribute_mask &
 950        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 951          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 952       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 953       return false;
 954    }
 955
 956    bool notify_reset =
 957       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 958        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 959
 960    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 961    if (!brw) {
 962       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 963       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 964       return false;
 965    }
 966
 967    driContextPriv->driverPrivate = brw;
 968    brw->driContext = driContextPriv;
 969    brw->screen = screen;
 970    brw->bufmgr = screen->bufmgr;
 971
 972    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 973    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 974
 975    brw->has_swizzling = screen->hw_has_swizzling;
 976
 977    brw->isl_dev = screen->isl_dev;
 978
 979    brw->vs.base.stage = MESA_SHADER_VERTEX;
 980    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 981    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 982    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 983    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 984    brw->cs.base.stage = MESA_SHADER_COMPUTE;
 985
 986    brw_init_driver_functions(brw, &functions);
 987
 988    if (notify_reset)
 989       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 990
 991    struct gl_context *ctx = &brw->ctx;
 992
 993    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 994       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 995       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 996       intelDestroyContext(driContextPriv);
 997       return false;
 998    }
 999
1000    driContextSetFlags(ctx, ctx_config->flags);
1001
1002    /* Initialize the software rasterizer and helper modules.
1003     *
1004     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1005     * software fallbacks (which we have to support on legacy GL to do weird
1006     * glDrawPixels(), glBitmap(), and other functions).
1007     */
1008    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1009       _swrast_CreateContext(ctx);
1010    }
1011
1012    _vbo_CreateContext(ctx);
1013    if (ctx->swrast_context) {
1014       _tnl_CreateContext(ctx);
1015       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1016       _swsetup_CreateContext(ctx);
1017
1018       /* Configure swrast to match hardware characteristics: */
1019       _swrast_allow_pixel_fog(ctx, false);
1020       _swrast_allow_vertex_fog(ctx, true);
1021    }
1022
1023    _mesa_meta_init(ctx);
1024
1025    brw_process_driconf_options(brw);
1026
1027    if (INTEL_DEBUG & DEBUG_PERF)
1028       brw->perf_debug = true;
1029
1030    brw_initialize_cs_context_constants(brw);
1031    brw_initialize_context_constants(brw);
1032
1033    ctx->Const.ResetStrategy = notify_reset
1034       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1035
1036    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1037    _mesa_init_point(ctx);
1038
1039    intel_fbo_init(brw);
1040
1041    intel_batchbuffer_init(brw);
1042
1043    /* Create a new hardware context.  Using a hardware context means that
1044     * our GPU state will be saved/restored on context switch, allowing us
1045     * to assume that the GPU is in the same state we left it in.
1046     *
1047     * This is required for transform feedback buffer offsets, query objects,
1048     * and also allows us to reduce how much state we have to emit.
1049     */
1050    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1051    if (!brw->hw_ctx && devinfo->gen >= 6) {
1052       fprintf(stderr, "Failed to create hardware context.\n");
1053       intelDestroyContext(driContextPriv);
1054       return false;
1055    }
1056
1057    if (brw->hw_ctx) {
1058       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1059       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1060          switch (ctx_config->priority) {
1061          case __DRI_CTX_PRIORITY_LOW:
1062             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1063             break;
1064          case __DRI_CTX_PRIORITY_HIGH:
1065             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1066             break;
1067          }
1068       }
1069       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1070           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1071          fprintf(stderr,
1072                  "Failed to set priority [%d:%d] for hardware context.\n",
1073                  ctx_config->priority, hw_priority);
1074          intelDestroyContext(driContextPriv);
1075          return false;
1076       }
1077    }
1078
1079    if (brw_init_pipe_control(brw, devinfo)) {
1080       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1081       intelDestroyContext(driContextPriv);
1082       return false;
1083    }
1084
1085    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1086
1087    brw_init_state(brw);
1088
1089    intelInitExtensions(ctx);
1090
1091    brw_init_surface_formats(brw);
1092
1093    brw_blorp_init(brw);
1094
1095    brw->urb.size = devinfo->urb.size;
1096
1097    if (devinfo->gen == 6)
1098       brw->urb.gs_present = false;
1099
1100    brw->prim_restart.in_progress = false;
1101    brw->prim_restart.enable_cut_index = false;
1102    brw->gs.enabled = false;
1103    brw->clip.viewport_count = 1;
1104
1105    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1106
1107    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1108
1109    ctx->VertexProgram._MaintainTnlProgram = true;
1110    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1111
1112    brw_draw_init( brw );
1113
1114    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1115       /* Turn on some extra GL_ARB_debug_output generation. */
1116       brw->perf_debug = true;
1117    }
1118
1119    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1120       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1121       ctx->Const.RobustAccess = GL_TRUE;
1122    }
1123
1124    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1125       brw_init_shader_time(brw);
1126
1127    _mesa_override_extensions(ctx);
1128    _mesa_compute_version(ctx);
1129
1130    /* GL_ARB_gl_spirv */
1131    if (ctx->Extensions.ARB_gl_spirv) {
1132       brw_initialize_spirv_supported_capabilities(brw);
1133
1134       if (ctx->Extensions.ARB_spirv_extensions) {
1135          /* GL_ARB_spirv_extensions */
1136          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1137          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1138                                                &ctx->Const.SpirVCapabilities);
1139       }
1140    }
1141
1142    _mesa_initialize_dispatch_tables(ctx);
1143    _mesa_initialize_vbo_vtxfmt(ctx);
1144
1145    if (ctx->Extensions.INTEL_performance_query)
1146       brw_init_performance_queries(brw);
1147
1148    vbo_use_buffer_objects(ctx);
1149    vbo_always_unmap_buffers(ctx);
1150
1151    brw->ctx.Cache = brw->screen->disk_cache;
1152
1153    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1154        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1155       /* Loader supports multithreading, and so do we. */
1156       _mesa_glthread_init(ctx);
1157    }
1158
1159    return true;
1160 }
1161
1162 void
1163 intelDestroyContext(__DRIcontext * driContextPriv)
1164 {
1165    struct brw_context *brw =
1166       (struct brw_context *) driContextPriv->driverPrivate;
1167    struct gl_context *ctx = &brw->ctx;
1168
1169    GET_CURRENT_CONTEXT(curctx);
1170
1171    if (curctx == NULL) {
1172       /* No current context, but we need one to release
1173        * renderbuffer surface when we release framebuffer.
1174        * So temporarily bind the context.
1175        */
1176       _mesa_make_current(ctx, NULL, NULL);
1177    }
1178
1179    _mesa_glthread_destroy(&brw->ctx);
1180
1181    _mesa_meta_free(&brw->ctx);
1182
1183    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1184       /* Force a report. */
1185       brw->shader_time.report_time = 0;
1186
1187       brw_collect_and_report_shader_time(brw);
1188       brw_destroy_shader_time(brw);
1189    }
1190
1191    blorp_finish(&brw->blorp);
1192
1193    brw_destroy_state(brw);
1194    brw_draw_destroy(brw);
1195
1196    brw_bo_unreference(brw->curbe.curbe_bo);
1197
1198    brw_bo_unreference(brw->vs.base.scratch_bo);
1199    brw_bo_unreference(brw->tcs.base.scratch_bo);
1200    brw_bo_unreference(brw->tes.base.scratch_bo);
1201    brw_bo_unreference(brw->gs.base.scratch_bo);
1202    brw_bo_unreference(brw->wm.base.scratch_bo);
1203
1204    brw_bo_unreference(brw->vs.base.push_const_bo);
1205    brw_bo_unreference(brw->tcs.base.push_const_bo);
1206    brw_bo_unreference(brw->tes.base.push_const_bo);
1207    brw_bo_unreference(brw->gs.base.push_const_bo);
1208    brw_bo_unreference(brw->wm.base.push_const_bo);
1209
1210    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1211
1212    if (ctx->swrast_context) {
1213       _swsetup_DestroyContext(&brw->ctx);
1214       _tnl_DestroyContext(&brw->ctx);
1215    }
1216    _vbo_DestroyContext(&brw->ctx);
1217
1218    if (ctx->swrast_context)
1219       _swrast_DestroyContext(&brw->ctx);
1220
1221    brw_fini_pipe_control(brw);
1222    intel_batchbuffer_free(&brw->batch);
1223
1224    brw_bo_unreference(brw->throttle_batch[1]);
1225    brw_bo_unreference(brw->throttle_batch[0]);
1226    brw->throttle_batch[1] = NULL;
1227    brw->throttle_batch[0] = NULL;
1228
1229    driDestroyOptionCache(&brw->optionCache);
1230
1231    /* free the Mesa context */
1232    _mesa_free_context_data(&brw->ctx, true);
1233
1234    ralloc_free(brw);
1235    driContextPriv->driverPrivate = NULL;
1236 }
1237
1238 GLboolean
1239 intelUnbindContext(__DRIcontext * driContextPriv)
1240 {
1241    GET_CURRENT_CONTEXT(ctx);
1242    _mesa_glthread_finish(ctx);
1243
1244    /* Unset current context and dispath table */
1245    _mesa_make_current(NULL, NULL, NULL);
1246
1247    return true;
1248 }
1249
1250 /**
1251  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1252  * on window system framebuffers.
1253  *
1254  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1255  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1256  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1257  * for a visual where you're guaranteed to be capable, but it turns out that
1258  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1259  * incapable ones, because there's no difference between the two in resources
1260  * used.  Applications thus get built that accidentally rely on the default
1261  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1262  * great...
1263  *
1264  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1265  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1266  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1267  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1268  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1269  * and get no sRGB encode (assuming that both kinds of visual are available).
1270  * Thus our choice to support sRGB by default on our visuals for desktop would
1271  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1272  *
1273  * Unfortunately, renderbuffer setup happens before a context is created.  So
1274  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1275  * context (without an sRGB visual), we go turn that back off before anyone
1276  * finds out.
1277  */
1278 static void
1279 intel_gles3_srgb_workaround(struct brw_context *brw,
1280                             struct gl_framebuffer *fb)
1281 {
1282    struct gl_context *ctx = &brw->ctx;
1283
1284    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1285       return;
1286
1287    for (int i = 0; i < BUFFER_COUNT; i++) {
1288       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1289
1290       /* Check if sRGB was specifically asked for. */
1291       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1292       if (irb && irb->need_srgb)
1293          return;
1294
1295       if (rb)
1296          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1297    }
1298    /* Disable sRGB from framebuffers that are not compatible. */
1299    fb->Visual.sRGBCapable = false;
1300 }
1301
1302 GLboolean
1303 intelMakeCurrent(__DRIcontext * driContextPriv,
1304                  __DRIdrawable * driDrawPriv,
1305                  __DRIdrawable * driReadPriv)
1306 {
1307    struct brw_context *brw;
1308
1309    if (driContextPriv)
1310       brw = (struct brw_context *) driContextPriv->driverPrivate;
1311    else
1312       brw = NULL;
1313
1314    if (driContextPriv) {
1315       struct gl_context *ctx = &brw->ctx;
1316       struct gl_framebuffer *fb, *readFb;
1317
1318       if (driDrawPriv == NULL) {
1319          fb = _mesa_get_incomplete_framebuffer();
1320       } else {
1321          fb = driDrawPriv->driverPrivate;
1322          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1323       }
1324
1325       if (driReadPriv == NULL) {
1326          readFb = _mesa_get_incomplete_framebuffer();
1327       } else {
1328          readFb = driReadPriv->driverPrivate;
1329          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1330       }
1331
1332       /* The sRGB workaround changes the renderbuffer's format. We must change
1333        * the format before the renderbuffer's miptree get's allocated, otherwise
1334        * the formats of the renderbuffer and its miptree will differ.
1335        */
1336       intel_gles3_srgb_workaround(brw, fb);
1337       intel_gles3_srgb_workaround(brw, readFb);
1338
1339       /* If the context viewport hasn't been initialized, force a call out to
1340        * the loader to get buffers so we have a drawable size for the initial
1341        * viewport. */
1342       if (!brw->ctx.ViewportInitialized)
1343          intel_prepare_render(brw);
1344
1345       _mesa_make_current(ctx, fb, readFb);
1346    } else {
1347       GET_CURRENT_CONTEXT(ctx);
1348       _mesa_glthread_finish(ctx);
1349       _mesa_make_current(NULL, NULL, NULL);
1350    }
1351
1352    return true;
1353 }
1354
1355 void
1356 intel_resolve_for_dri2_flush(struct brw_context *brw,
1357                              __DRIdrawable *drawable)
1358 {
1359    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1360
1361    if (devinfo->gen < 6) {
1362       /* MSAA and fast color clear are not supported, so don't waste time
1363        * checking whether a resolve is needed.
1364        */
1365       return;
1366    }
1367
1368    struct gl_framebuffer *fb = drawable->driverPrivate;
1369    struct intel_renderbuffer *rb;
1370
1371    /* Usually, only the back buffer will need to be downsampled. However,
1372     * the front buffer will also need it if the user has rendered into it.
1373     */
1374    static const gl_buffer_index buffers[2] = {
1375          BUFFER_BACK_LEFT,
1376          BUFFER_FRONT_LEFT,
1377    };
1378
1379    for (int i = 0; i < 2; ++i) {
1380       rb = intel_get_renderbuffer(fb, buffers[i]);
1381       if (rb == NULL || rb->mt == NULL)
1382          continue;
1383       if (rb->mt->surf.samples == 1) {
1384          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1385                 rb->layer_count == 1);
1386          intel_miptree_prepare_external(brw, rb->mt);
1387       } else {
1388          intel_renderbuffer_downsample(brw, rb);
1389
1390          /* Call prepare_external on the single-sample miptree to do any
1391           * needed resolves prior to handing it off to the window system.
1392           * This is needed in the case that rb->singlesample_mt is Y-tiled
1393           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1394           * this case, the MSAA resolve above will write compressed data into
1395           * rb->singlesample_mt.
1396           *
1397           * TODO: Some day, if we decide to care about the tiny performance
1398           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1399           * we could detect this case and just allocate the single-sampled
1400           * miptree without aux.  However, that would be a lot of plumbing and
1401           * this is a rather exotic case so it's not really worth it.
1402           */
1403          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1404       }
1405    }
1406 }
1407
1408 static unsigned
1409 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1410 {
1411    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1412 }
1413
1414 static void
1415 intel_query_dri2_buffers(struct brw_context *brw,
1416                          __DRIdrawable *drawable,
1417                          __DRIbuffer **buffers,
1418                          int *count);
1419
1420 static void
1421 intel_process_dri2_buffer(struct brw_context *brw,
1422                           __DRIdrawable *drawable,
1423                           __DRIbuffer *buffer,
1424                           struct intel_renderbuffer *rb,
1425                           const char *buffer_name);
1426
1427 static void
1428 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1429
1430 static void
1431 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1432 {
1433    struct gl_framebuffer *fb = drawable->driverPrivate;
1434    struct intel_renderbuffer *rb;
1435    __DRIbuffer *buffers = NULL;
1436    int count;
1437    const char *region_name;
1438
1439    /* Set this up front, so that in case our buffers get invalidated
1440     * while we're getting new buffers, we don't clobber the stamp and
1441     * thus ignore the invalidate. */
1442    drawable->lastStamp = drawable->dri2.stamp;
1443
1444    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1445       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1446
1447    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1448
1449    if (buffers == NULL)
1450       return;
1451
1452    for (int i = 0; i < count; i++) {
1453        switch (buffers[i].attachment) {
1454        case __DRI_BUFFER_FRONT_LEFT:
1455            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1456            region_name = "dri2 front buffer";
1457            break;
1458
1459        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1460            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1461            region_name = "dri2 fake front buffer";
1462            break;
1463
1464        case __DRI_BUFFER_BACK_LEFT:
1465            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1466            region_name = "dri2 back buffer";
1467            break;
1468
1469        case __DRI_BUFFER_DEPTH:
1470        case __DRI_BUFFER_HIZ:
1471        case __DRI_BUFFER_DEPTH_STENCIL:
1472        case __DRI_BUFFER_STENCIL:
1473        case __DRI_BUFFER_ACCUM:
1474        default:
1475            fprintf(stderr,
1476                    "unhandled buffer attach event, attachment type %d\n",
1477                    buffers[i].attachment);
1478            return;
1479        }
1480
1481        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1482    }
1483
1484 }
1485
1486 void
1487 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1488 {
1489    struct brw_context *brw = context->driverPrivate;
1490    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1491
1492    /* Set this up front, so that in case our buffers get invalidated
1493     * while we're getting new buffers, we don't clobber the stamp and
1494     * thus ignore the invalidate. */
1495    drawable->lastStamp = drawable->dri2.stamp;
1496
1497    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1498       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1499
1500    if (dri_screen->image.loader)
1501       intel_update_image_buffers(brw, drawable);
1502    else
1503       intel_update_dri2_buffers(brw, drawable);
1504
1505    driUpdateFramebufferSize(&brw->ctx, drawable);
1506 }
1507
1508 /**
1509  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1510  * state is required.
1511  */
1512 void
1513 intel_prepare_render(struct brw_context *brw)
1514 {
1515    struct gl_context *ctx = &brw->ctx;
1516    __DRIcontext *driContext = brw->driContext;
1517    __DRIdrawable *drawable;
1518
1519    drawable = driContext->driDrawablePriv;
1520    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1521       if (drawable->lastStamp != drawable->dri2.stamp)
1522          intel_update_renderbuffers(driContext, drawable);
1523       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1524    }
1525
1526    drawable = driContext->driReadablePriv;
1527    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1528       if (drawable->lastStamp != drawable->dri2.stamp)
1529          intel_update_renderbuffers(driContext, drawable);
1530       driContext->dri2.read_stamp = drawable->dri2.stamp;
1531    }
1532
1533    /* If we're currently rendering to the front buffer, the rendering
1534     * that will happen next will probably dirty the front buffer.  So
1535     * mark it as dirty here.
1536     */
1537    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1538       brw->front_buffer_dirty = true;
1539
1540    if (brw->is_shared_buffer_bound) {
1541       /* Subsequent rendering will probably dirty the shared buffer. */
1542       brw->is_shared_buffer_dirty = true;
1543    }
1544 }
1545
1546 /**
1547  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1548  *
1549  * To determine which DRI buffers to request, examine the renderbuffers
1550  * attached to the drawable's framebuffer. Then request the buffers with
1551  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1552  *
1553  * This is called from intel_update_renderbuffers().
1554  *
1555  * \param drawable      Drawable whose buffers are queried.
1556  * \param buffers       [out] List of buffers returned by DRI2 query.
1557  * \param buffer_count  [out] Number of buffers returned.
1558  *
1559  * \see intel_update_renderbuffers()
1560  * \see DRI2GetBuffers()
1561  * \see DRI2GetBuffersWithFormat()
1562  */
1563 static void
1564 intel_query_dri2_buffers(struct brw_context *brw,
1565                          __DRIdrawable *drawable,
1566                          __DRIbuffer **buffers,
1567                          int *buffer_count)
1568 {
1569    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1570    struct gl_framebuffer *fb = drawable->driverPrivate;
1571    int i = 0;
1572    unsigned attachments[8];
1573
1574    struct intel_renderbuffer *front_rb;
1575    struct intel_renderbuffer *back_rb;
1576
1577    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1578    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1579
1580    memset(attachments, 0, sizeof(attachments));
1581    if ((_mesa_is_front_buffer_drawing(fb) ||
1582         _mesa_is_front_buffer_reading(fb) ||
1583         !back_rb) && front_rb) {
1584       /* If a fake front buffer is in use, then querying for
1585        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1586        * the real front buffer to the fake front buffer.  So before doing the
1587        * query, we need to make sure all the pending drawing has landed in the
1588        * real front buffer.
1589        */
1590       intel_batchbuffer_flush(brw);
1591       intel_flush_front(&brw->ctx);
1592
1593       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1594       attachments[i++] = intel_bits_per_pixel(front_rb);
1595    } else if (front_rb && brw->front_buffer_dirty) {
1596       /* We have pending front buffer rendering, but we aren't querying for a
1597        * front buffer.  If the front buffer we have is a fake front buffer,
1598        * the X server is going to throw it away when it processes the query.
1599        * So before doing the query, make sure all the pending drawing has
1600        * landed in the real front buffer.
1601        */
1602       intel_batchbuffer_flush(brw);
1603       intel_flush_front(&brw->ctx);
1604    }
1605
1606    if (back_rb) {
1607       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1608       attachments[i++] = intel_bits_per_pixel(back_rb);
1609    }
1610
1611    assert(i <= ARRAY_SIZE(attachments));
1612
1613    *buffers =
1614       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1615                                                     &drawable->w,
1616                                                     &drawable->h,
1617                                                     attachments, i / 2,
1618                                                     buffer_count,
1619                                                     drawable->loaderPrivate);
1620 }
1621
1622 /**
1623  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1624  *
1625  * This is called from intel_update_renderbuffers().
1626  *
1627  * \par Note:
1628  *    DRI buffers whose attachment point is DRI2BufferStencil or
1629  *    DRI2BufferDepthStencil are handled as special cases.
1630  *
1631  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1632  *        that is passed to brw_bo_gem_create_from_name().
1633  *
1634  * \see intel_update_renderbuffers()
1635  */
1636 static void
1637 intel_process_dri2_buffer(struct brw_context *brw,
1638                           __DRIdrawable *drawable,
1639                           __DRIbuffer *buffer,
1640                           struct intel_renderbuffer *rb,
1641                           const char *buffer_name)
1642 {
1643    struct gl_framebuffer *fb = drawable->driverPrivate;
1644    struct brw_bo *bo;
1645
1646    if (!rb)
1647       return;
1648
1649    unsigned num_samples = rb->Base.Base.NumSamples;
1650
1651    /* We try to avoid closing and reopening the same BO name, because the first
1652     * use of a mapping of the buffer involves a bunch of page faulting which is
1653     * moderately expensive.
1654     */
1655    struct intel_mipmap_tree *last_mt;
1656    if (num_samples == 0)
1657       last_mt = rb->mt;
1658    else
1659       last_mt = rb->singlesample_mt;
1660
1661    uint32_t old_name = 0;
1662    if (last_mt) {
1663        /* The bo already has a name because the miptree was created by a
1664         * previous call to intel_process_dri2_buffer(). If a bo already has a
1665         * name, then brw_bo_flink() is a low-cost getter.  It does not
1666         * create a new name.
1667         */
1668       brw_bo_flink(last_mt->bo, &old_name);
1669    }
1670
1671    if (old_name == buffer->name)
1672       return;
1673
1674    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1675       fprintf(stderr,
1676               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1677               buffer->name, buffer->attachment,
1678               buffer->cpp, buffer->pitch);
1679    }
1680
1681    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1682                                           buffer->name);
1683    if (!bo) {
1684       fprintf(stderr,
1685               "Failed to open BO for returned DRI2 buffer "
1686               "(%dx%d, %s, named %d).\n"
1687               "This is likely a bug in the X Server that will lead to a "
1688               "crash soon.\n",
1689               drawable->w, drawable->h, buffer_name, buffer->name);
1690       return;
1691    }
1692
1693    uint32_t tiling, swizzle;
1694    brw_bo_get_tiling(bo, &tiling, &swizzle);
1695
1696    struct intel_mipmap_tree *mt =
1697       intel_miptree_create_for_bo(brw,
1698                                   bo,
1699                                   intel_rb_format(rb),
1700                                   0,
1701                                   drawable->w,
1702                                   drawable->h,
1703                                   1,
1704                                   buffer->pitch,
1705                                   isl_tiling_from_i915_tiling(tiling),
1706                                   MIPTREE_CREATE_DEFAULT);
1707    if (!mt) {
1708       brw_bo_unreference(bo);
1709       return;
1710    }
1711
1712    /* We got this BO from X11.  We cana't assume that we have coherent texture
1713     * access because X may suddenly decide to use it for scan-out which would
1714     * destroy coherency.
1715     */
1716    bo->cache_coherent = false;
1717
1718    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1719                                                  drawable->w, drawable->h,
1720                                                  buffer->pitch)) {
1721       brw_bo_unreference(bo);
1722       intel_miptree_release(&mt);
1723       return;
1724    }
1725
1726    if (_mesa_is_front_buffer_drawing(fb) &&
1727        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1728         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1729        rb->Base.Base.NumSamples > 1) {
1730       intel_renderbuffer_upsample(brw, rb);
1731    }
1732
1733    assert(rb->mt);
1734
1735    brw_bo_unreference(bo);
1736 }
1737
1738 /**
1739  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1740  *
1741  * To determine which DRI buffers to request, examine the renderbuffers
1742  * attached to the drawable's framebuffer. Then request the buffers from
1743  * the image loader
1744  *
1745  * This is called from intel_update_renderbuffers().
1746  *
1747  * \param drawable      Drawable whose buffers are queried.
1748  * \param buffers       [out] List of buffers returned by DRI2 query.
1749  * \param buffer_count  [out] Number of buffers returned.
1750  *
1751  * \see intel_update_renderbuffers()
1752  */
1753
1754 static void
1755 intel_update_image_buffer(struct brw_context *intel,
1756                           __DRIdrawable *drawable,
1757                           struct intel_renderbuffer *rb,
1758                           __DRIimage *buffer,
1759                           enum __DRIimageBufferMask buffer_type)
1760 {
1761    struct gl_framebuffer *fb = drawable->driverPrivate;
1762
1763    if (!rb || !buffer->bo)
1764       return;
1765
1766    unsigned num_samples = rb->Base.Base.NumSamples;
1767
1768    /* Check and see if we're already bound to the right
1769     * buffer object
1770     */
1771    struct intel_mipmap_tree *last_mt;
1772    if (num_samples == 0)
1773       last_mt = rb->mt;
1774    else
1775       last_mt = rb->singlesample_mt;
1776
1777    if (last_mt && last_mt->bo == buffer->bo) {
1778       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1779          intel_miptree_make_shareable(intel, last_mt);
1780       }
1781       return;
1782    }
1783
1784    /* Only allow internal compression if samples == 0.  For multisampled
1785     * window system buffers, the only thing the single-sampled buffer is used
1786     * for is as a resolve target.  If we do any compression beyond what is
1787     * supported by the window system, we will just have to resolve so it's
1788     * probably better to just not bother.
1789     */
1790    const bool allow_internal_aux = (num_samples == 0);
1791
1792    struct intel_mipmap_tree *mt =
1793       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1794                                          intel_rb_format(rb),
1795                                          allow_internal_aux);
1796    if (!mt)
1797       return;
1798
1799    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1800                                                  buffer->width, buffer->height,
1801                                                  buffer->pitch)) {
1802       intel_miptree_release(&mt);
1803       return;
1804    }
1805
1806    if (_mesa_is_front_buffer_drawing(fb) &&
1807        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1808        rb->Base.Base.NumSamples > 1) {
1809       intel_renderbuffer_upsample(intel, rb);
1810    }
1811
1812    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1813       /* The compositor and the application may access this image
1814        * concurrently. The display hardware may even scanout the image while
1815        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1816        * concurrent access, so permanently disable aux for this miptree.
1817        *
1818        * Perhaps we could improve overall application performance by
1819        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1820        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1821        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1822        * approach to be highly dependent on the application's GL usage.
1823        *
1824        * I [chadv] expect clever disabling/reenabling to be counterproductive
1825        * in the use cases I care about: applications that render nearly
1826        * realtime handwriting to the surface while possibly undergiong
1827        * simultaneously scanout as a display plane. The app requires low
1828        * render latency. Even though the app spends most of its time in
1829        * shared-buffer mode, it also frequently transitions between
1830        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1831        * mode.  Visual sutter during the transitions should be avoided.
1832        *
1833        * In this case, I [chadv] believe reducing the GPU workload at
1834        * shared-buffer/double-buffer transitions would offer a smoother app
1835        * experience than any savings due to aux compression. But I've
1836        * collected no data to prove my theory.
1837        */
1838       intel_miptree_make_shareable(intel, mt);
1839    }
1840 }
1841
1842 static void
1843 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1844 {
1845    struct gl_framebuffer *fb = drawable->driverPrivate;
1846    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1847    struct intel_renderbuffer *front_rb;
1848    struct intel_renderbuffer *back_rb;
1849    struct __DRIimageList images;
1850    mesa_format format;
1851    uint32_t buffer_mask = 0;
1852    int ret;
1853
1854    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1855    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1856
1857    if (back_rb)
1858       format = intel_rb_format(back_rb);
1859    else if (front_rb)
1860       format = intel_rb_format(front_rb);
1861    else
1862       return;
1863
1864    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1865                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1866       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1867    }
1868
1869    if (back_rb)
1870       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1871
1872    ret = dri_screen->image.loader->getBuffers(drawable,
1873                                               driGLFormatToImageFormat(format),
1874                                               &drawable->dri2.stamp,
1875                                               drawable->loaderPrivate,
1876                                               buffer_mask,
1877                                               &images);
1878    if (!ret)
1879       return;
1880
1881    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1882       drawable->w = images.front->width;
1883       drawable->h = images.front->height;
1884       intel_update_image_buffer(brw,
1885                                 drawable,
1886                                 front_rb,
1887                                 images.front,
1888                                 __DRI_IMAGE_BUFFER_FRONT);
1889    }
1890
1891    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1892       drawable->w = images.back->width;
1893       drawable->h = images.back->height;
1894       intel_update_image_buffer(brw,
1895                                 drawable,
1896                                 back_rb,
1897                                 images.back,
1898                                 __DRI_IMAGE_BUFFER_BACK);
1899    }
1900
1901    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1902       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1903       drawable->w = images.back->width;
1904       drawable->h = images.back->height;
1905       intel_update_image_buffer(brw,
1906                                 drawable,
1907                                 back_rb,
1908                                 images.back,
1909                                 __DRI_IMAGE_BUFFER_SHARED);
1910       brw->is_shared_buffer_bound = true;
1911    } else {
1912       brw->is_shared_buffer_bound = false;
1913       brw->is_shared_buffer_dirty = false;
1914    }
1915 }