src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "main/imports.h"
  41 #include "main/macros.h"
  42 #include "main/points.h"
  43 #include "main/version.h"
  44 #include "main/vtxfmt.h"
  45 #include "main/texobj.h"
  46 #include "main/framebuffer.h"
  47 #include "main/stencil.h"
  48 #include "main/state.h"
  49 #include "main/spirv_extensions.h"
  50
  51 #include "vbo/vbo.h"
  52
  53 #include "drivers/common/driverfuncs.h"
  54 #include "drivers/common/meta.h"
  55 #include "utils.h"
  56
  57 #include "brw_context.h"
  58 #include "brw_defines.h"
  59 #include "brw_blorp.h"
  60 #include "brw_draw.h"
  61 #include "brw_state.h"
  62
  63 #include "intel_batchbuffer.h"
  64 #include "intel_buffer_objects.h"
  65 #include "intel_buffers.h"
  66 #include "intel_fbo.h"
  67 #include "intel_mipmap_tree.h"
  68 #include "intel_pixel.h"
  69 #include "intel_image.h"
  70 #include "intel_tex.h"
  71 #include "intel_tex_obj.h"
  72
  73 #include "swrast_setup/swrast_setup.h"
  74 #include "tnl/tnl.h"
  75 #include "tnl/t_pipeline.h"
  76 #include "util/ralloc.h"
  77 #include "util/debug.h"
  78 #include "util/disk_cache.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    const char *chipset;
 107    static char buffer[128];
 108    char *bsw = NULL;
 109
 110    switch (screen->deviceID) {
 111 #undef CHIPSET
 112 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 113 #include "pci_ids/i965_pci_ids.h"
 114    default:
 115       chipset = "Unknown Intel Chipset";
 116       break;
 117    }
 118
 119    /* Braswell branding is funny, so we have to fix it up here */
 120    if (screen->deviceID == 0x22B1) {
 121       bsw = strdup(chipset);
 122       char *needle = strstr(bsw, "XXX");
 123       if (needle) {
 124          memcpy(needle, get_bsw_model(screen), 3);
 125          chipset = bsw;
 126       }
 127    }
 128
 129    (void) driGetRendererString(buffer, chipset, 0);
 130    free(bsw);
 131    return buffer;
 132 }
 133
 134 static const GLubyte *
 135 intel_get_string(struct gl_context * ctx, GLenum name)
 136 {
 137    const struct brw_context *const brw = brw_context(ctx);
 138
 139    switch (name) {
 140    case GL_VENDOR:
 141       return (GLubyte *) brw_vendor_string;
 142
 143    case GL_RENDERER:
 144       return
 145          (GLubyte *) brw_get_renderer_string(brw->screen);
 146
 147    default:
 148       return NULL;
 149    }
 150 }
 151
 152 static void
 153 brw_set_background_context(struct gl_context *ctx,
 154                            struct util_queue_monitoring *queue_info)
 155 {
 156    struct brw_context *brw = brw_context(ctx);
 157    __DRIcontext *driContext = brw->driContext;
 158    __DRIscreen *driScreen = driContext->driScreenPriv;
 159    const __DRIbackgroundCallableExtension *backgroundCallable =
 160       driScreen->dri2.backgroundCallable;
 161
 162    /* Note: Mesa will only call this function if we've called
 163     * _mesa_enable_multithreading().  We only do that if the loader exposed
 164     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 165     * backgroundCallable is not NULL.
 166     */
 167    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 168 }
 169
 170 static void
 171 intel_viewport(struct gl_context *ctx)
 172 {
 173    struct brw_context *brw = brw_context(ctx);
 174    __DRIcontext *driContext = brw->driContext;
 175
 176    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 177       if (driContext->driDrawablePriv)
 178          dri2InvalidateDrawable(driContext->driDrawablePriv);
 179       if (driContext->driReadablePriv)
 180          dri2InvalidateDrawable(driContext->driReadablePriv);
 181    }
 182 }
 183
 184 static void
 185 intel_update_framebuffer(struct gl_context *ctx,
 186                          struct gl_framebuffer *fb)
 187 {
 188    struct brw_context *brw = brw_context(ctx);
 189
 190    /* Quantize the derived default number of samples
 191     */
 192    fb->DefaultGeometry._NumSamples =
 193       intel_quantize_num_samples(brw->screen,
 194                                  fb->DefaultGeometry.NumSamples);
 195 }
 196
 197 static void
 198 intel_update_state(struct gl_context * ctx)
 199 {
 200    GLuint new_state = ctx->NewState;
 201    struct brw_context *brw = brw_context(ctx);
 202
 203    if (ctx->swrast_context)
 204       _swrast_InvalidateState(ctx, new_state);
 205
 206    brw->NewGLState |= new_state;
 207
 208    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 209       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 210
 211    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 212       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 213       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 214       brw->stencil_write_enabled =
 215          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 216    }
 217
 218    if (new_state & _NEW_POLYGON)
 219       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 220
 221    if (new_state & _NEW_BUFFERS) {
 222       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 223       if (ctx->DrawBuffer != ctx->ReadBuffer)
 224          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 225    }
 226 }
 227
 228 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 229
 230 static void
 231 intel_flush_front(struct gl_context *ctx)
 232 {
 233    struct brw_context *brw = brw_context(ctx);
 234    __DRIcontext *driContext = brw->driContext;
 235    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 236    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 237
 238    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 239       if (flushFront(dri_screen) && driDrawable &&
 240           driDrawable->loaderPrivate) {
 241
 242          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 243           *
 244           * This potentially resolves both front and back buffer. It
 245           * is unnecessary to resolve the back, but harms nothing except
 246           * performance. And no one cares about front-buffer render
 247           * performance.
 248           */
 249          intel_resolve_for_dri2_flush(brw, driDrawable);
 250          intel_batchbuffer_flush(brw);
 251
 252          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 253
 254          /* We set the dirty bit in intel_prepare_render() if we're
 255           * front buffer rendering once we get there.
 256           */
 257          brw->front_buffer_dirty = false;
 258       }
 259    }
 260 }
 261
 262 static void
 263 brw_display_shared_buffer(struct brw_context *brw)
 264 {
 265    __DRIcontext *dri_context = brw->driContext;
 266    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 267    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 268    int fence_fd = -1;
 269
 270    if (!brw->is_shared_buffer_bound)
 271       return;
 272
 273    if (!brw->is_shared_buffer_dirty)
 274       return;
 275
 276    if (brw->screen->has_exec_fence) {
 277       /* This function is always called during a flush operation, so there is
 278        * no need to flush again here. But we want to provide a fence_fd to the
 279        * loader, and a redundant flush is the easiest way to acquire one.
 280        */
 281       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 282          return;
 283    }
 284
 285    dri_screen->mutableRenderBuffer.loader
 286       ->displaySharedBuffer(dri_drawable, fence_fd,
 287                             dri_drawable->loaderPrivate);
 288    brw->is_shared_buffer_dirty = false;
 289 }
 290
 291 static void
 292 intel_glFlush(struct gl_context *ctx)
 293 {
 294    struct brw_context *brw = brw_context(ctx);
 295
 296    intel_batchbuffer_flush(brw);
 297    intel_flush_front(ctx);
 298    brw_display_shared_buffer(brw);
 299    brw->need_flush_throttle = true;
 300 }
 301
 302 static void
 303 intel_finish(struct gl_context * ctx)
 304 {
 305    struct brw_context *brw = brw_context(ctx);
 306
 307    intel_glFlush(ctx);
 308
 309    if (brw->batch.last_bo)
 310       brw_bo_wait_rendering(brw->batch.last_bo);
 311 }
 312
 313 static void
 314 brw_init_driver_functions(struct brw_context *brw,
 315                           struct dd_function_table *functions)
 316 {
 317    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 318
 319    _mesa_init_driver_functions(functions);
 320
 321    /* GLX uses DRI2 invalidate events to handle window resizing.
 322     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 323     * which doesn't provide a mechanism for snooping the event queues.
 324     *
 325     * So EGL still relies on viewport hacks to handle window resizing.
 326     * This should go away with DRI3000.
 327     */
 328    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 329       functions->Viewport = intel_viewport;
 330
 331    functions->Flush = intel_glFlush;
 332    functions->Finish = intel_finish;
 333    functions->GetString = intel_get_string;
 334    functions->UpdateState = intel_update_state;
 335
 336    brw_init_draw_functions(functions);
 337    intelInitTextureFuncs(functions);
 338    intelInitTextureImageFuncs(functions);
 339    intelInitTextureCopyImageFuncs(functions);
 340    intelInitCopyImageFuncs(functions);
 341    intelInitClearFuncs(functions);
 342    intelInitBufferFuncs(functions);
 343    intelInitPixelFuncs(functions);
 344    intelInitBufferObjectFuncs(functions);
 345    brw_init_syncobj_functions(functions);
 346    brw_init_object_purgeable_functions(functions);
 347
 348    brwInitFragProgFuncs( functions );
 349    brw_init_common_queryobj_functions(functions);
 350    if (devinfo->gen >= 8 || devinfo->is_haswell)
 351       hsw_init_queryobj_functions(functions);
 352    else if (devinfo->gen >= 6)
 353       gen6_init_queryobj_functions(functions);
 354    else
 355       gen4_init_queryobj_functions(functions);
 356    brw_init_compute_functions(functions);
 357    brw_init_conditional_render_functions(functions);
 358
 359    functions->GenerateMipmap = brw_generate_mipmap;
 360
 361    functions->QueryInternalFormat = brw_query_internal_format;
 362
 363    functions->NewTransformFeedback = brw_new_transform_feedback;
 364    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 365    if (can_do_mi_math_and_lrr(brw->screen)) {
 366       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 367       functions->EndTransformFeedback = hsw_end_transform_feedback;
 368       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 369       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 370    } else if (devinfo->gen >= 7) {
 371       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 372       functions->EndTransformFeedback = gen7_end_transform_feedback;
 373       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 374       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 375       functions->GetTransformFeedbackVertexCount =
 376          brw_get_transform_feedback_vertex_count;
 377    } else {
 378       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 379       functions->EndTransformFeedback = brw_end_transform_feedback;
 380       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 381       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 382       functions->GetTransformFeedbackVertexCount =
 383          brw_get_transform_feedback_vertex_count;
 384    }
 385
 386    if (devinfo->gen >= 6)
 387       functions->GetSamplePosition = gen6_get_sample_position;
 388
 389    /* GL_ARB_get_program_binary */
 390    brw_program_binary_init(brw->screen->deviceID);
 391    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 392    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 393    functions->ProgramBinaryDeserializeDriverBlob =
 394       brw_deserialize_program_binary;
 395
 396    if (brw->screen->disk_cache) {
 397       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 398    }
 399
 400    functions->SetBackgroundContext = brw_set_background_context;
 401 }
 402
 403 static void
 404 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 405 {
 406    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 407    struct gl_context *ctx = &brw->ctx;
 408
 409    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 410     * you should enable the extension only on gen7+, but just in case let's
 411     * assert it.
 412     */
 413    assert(devinfo->gen >= 7);
 414
 415    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 416    ctx->Const.SpirVCapabilities.draw_parameters = true;
 417    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 418    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 419    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 420    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 421    ctx->Const.SpirVCapabilities.tessellation = true;
 422    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 423    ctx->Const.SpirVCapabilities.variable_pointers = true;
 424 }
 425
 426 static void
 427 brw_initialize_context_constants(struct brw_context *brw)
 428 {
 429    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 430    struct gl_context *ctx = &brw->ctx;
 431    const struct brw_compiler *compiler = brw->screen->compiler;
 432
 433    const bool stage_exists[MESA_SHADER_STAGES] = {
 434       [MESA_SHADER_VERTEX] = true,
 435       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 436       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 437       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 438       [MESA_SHADER_FRAGMENT] = true,
 439       [MESA_SHADER_COMPUTE] =
 440          (_mesa_is_desktop_gl(ctx) &&
 441           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 442          (ctx->API == API_OPENGLES2 &&
 443           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 444    };
 445
 446    unsigned num_stages = 0;
 447    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 448       if (stage_exists[i])
 449          num_stages++;
 450    }
 451
 452    unsigned max_samplers =
 453       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 454
 455    ctx->Const.MaxDualSourceDrawBuffers = 1;
 456    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 457    ctx->Const.MaxCombinedShaderOutputResources =
 458       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 459
 460    /* The timestamp register we can read for glGetTimestamp() is
 461     * sometimes only 32 bits, before scaling to nanoseconds (depending
 462     * on kernel).
 463     *
 464     * Once scaled to nanoseconds the timestamp would roll over at a
 465     * non-power-of-two, so an application couldn't use
 466     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 467     * report 36 bits and truncate at that (rolling over 5 times as
 468     * often as the HW counter), and when the 32-bit counter rolls
 469     * over, it happens to also be at a rollover in the reported value
 470     * from near (1<<36) to 0.
 471     *
 472     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 473     * rolls over every ~69 seconds.
 474     */
 475    ctx->Const.QueryCounterBits.Timestamp = 36;
 476
 477    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 478    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 479    if (devinfo->gen >= 7) {
 480       ctx->Const.MaxRenderbufferSize = 16384;
 481       ctx->Const.MaxTextureSize = 16384;
 482       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 483    } else {
 484       ctx->Const.MaxRenderbufferSize = 8192;
 485       ctx->Const.MaxTextureSize = 8192;
 486       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 487    }
 488    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 489    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 490    ctx->Const.MaxTextureMbytes = 1536;
 491    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 492    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 493    ctx->Const.MaxTextureLodBias = 15.0;
 494    ctx->Const.StripTextureBorder = true;
 495    if (devinfo->gen >= 7) {
 496       ctx->Const.MaxProgramTextureGatherComponents = 4;
 497       ctx->Const.MinProgramTextureGatherOffset = -32;
 498       ctx->Const.MaxProgramTextureGatherOffset = 31;
 499    } else if (devinfo->gen == 6) {
 500       ctx->Const.MaxProgramTextureGatherComponents = 1;
 501       ctx->Const.MinProgramTextureGatherOffset = -8;
 502       ctx->Const.MaxProgramTextureGatherOffset = 7;
 503    }
 504
 505    ctx->Const.MaxUniformBlockSize = 65536;
 506
 507    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 508       struct gl_program_constants *prog = &ctx->Const.Program[i];
 509
 510       if (!stage_exists[i])
 511          continue;
 512
 513       prog->MaxTextureImageUnits = max_samplers;
 514
 515       prog->MaxUniformBlocks = BRW_MAX_UBO;
 516       prog->MaxCombinedUniformComponents =
 517          prog->MaxUniformComponents +
 518          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 519
 520       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 521       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 522       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 523       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 524    }
 525
 526    ctx->Const.MaxTextureUnits =
 527       MIN2(ctx->Const.MaxTextureCoordUnits,
 528            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 529
 530    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 531    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 532    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 533    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 534    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 535    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 536    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 537
 538
 539    /* Hardware only supports a limited number of transform feedback buffers.
 540     * So we need to override the Mesa default (which is based only on software
 541     * limits).
 542     */
 543    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 544
 545    /* On Gen6, in the worst case, we use up one binding table entry per
 546     * transform feedback component (see comments above the definition of
 547     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 548     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 549     * BRW_MAX_SOL_BINDINGS.
 550     *
 551     * In "separate components" mode, we need to divide this value by
 552     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 553     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 554     */
 555    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 556    ctx->Const.MaxTransformFeedbackSeparateComponents =
 557       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 558
 559    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 560       !can_do_mi_math_and_lrr(brw->screen);
 561
 562    int max_samples;
 563    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 564    const int clamp_max_samples =
 565       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 566
 567    if (clamp_max_samples < 0) {
 568       max_samples = msaa_modes[0];
 569    } else {
 570       /* Select the largest supported MSAA mode that does not exceed
 571        * clamp_max_samples.
 572        */
 573       max_samples = 0;
 574       for (int i = 0; msaa_modes[i] != 0; ++i) {
 575          if (msaa_modes[i] <= clamp_max_samples) {
 576             max_samples = msaa_modes[i];
 577             break;
 578          }
 579       }
 580    }
 581
 582    ctx->Const.MaxSamples = max_samples;
 583    ctx->Const.MaxColorTextureSamples = max_samples;
 584    ctx->Const.MaxDepthTextureSamples = max_samples;
 585    ctx->Const.MaxIntegerSamples = max_samples;
 586    ctx->Const.MaxImageSamples = 0;
 587
 588    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 589     * to map indices of rectangular grid to sample numbers within a pixel.
 590     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 591     * extension implementation. For more details see the comment above
 592     * gen6_set_sample_maps() definition.
 593     */
 594    gen6_set_sample_maps(ctx);
 595
 596    ctx->Const.MinLineWidth = 1.0;
 597    ctx->Const.MinLineWidthAA = 1.0;
 598    if (devinfo->gen >= 6) {
 599       ctx->Const.MaxLineWidth = 7.375;
 600       ctx->Const.MaxLineWidthAA = 7.375;
 601       ctx->Const.LineWidthGranularity = 0.125;
 602    } else {
 603       ctx->Const.MaxLineWidth = 7.0;
 604       ctx->Const.MaxLineWidthAA = 7.0;
 605       ctx->Const.LineWidthGranularity = 0.5;
 606    }
 607
 608    /* For non-antialiased lines, we have to round the line width to the
 609     * nearest whole number. Make sure that we don't advertise a line
 610     * width that, when rounded, will be beyond the actual hardware
 611     * maximum.
 612     */
 613    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 614
 615    ctx->Const.MinPointSize = 1.0;
 616    ctx->Const.MinPointSizeAA = 1.0;
 617    ctx->Const.MaxPointSize = 255.0;
 618    ctx->Const.MaxPointSizeAA = 255.0;
 619    ctx->Const.PointSizeGranularity = 1.0;
 620
 621    if (devinfo->gen >= 5 || devinfo->is_g4x)
 622       ctx->Const.MaxClipPlanes = 8;
 623
 624    ctx->Const.GLSLFragCoordIsSysVal = true;
 625    ctx->Const.GLSLFrontFacingIsSysVal = true;
 626    ctx->Const.GLSLTessLevelsAsInputs = true;
 627    ctx->Const.PrimitiveRestartForPatches = true;
 628
 629    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 630    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 631    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 632    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 633    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 634    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 635    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 636    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 637    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 638    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 639    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 640    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 641       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 642            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 643
 644    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 645    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 646    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 647    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 648    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 649    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 650    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 651    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 652    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 653       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 654            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 655
 656    /* Fragment shaders use real, 32-bit twos-complement integers for all
 657     * integer types.
 658     */
 659    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 660    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 661    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 662    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 663    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 664
 665    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 666    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 667    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 668    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 669    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 670
 671    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 672     * but we're not sure how it's actually done for vertex order,
 673     * that affect provoking vertex decision. Always use last vertex
 674     * convention for quad primitive which works as expected for now.
 675     */
 676    if (devinfo->gen >= 6)
 677       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 678
 679    ctx->Const.NativeIntegers = true;
 680
 681    /* Regarding the CMP instruction, the Ivybridge PRM says:
 682     *
 683     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 684     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 685     *    0xFFFFFFFF) is assigned to dst."
 686     *
 687     * but PRMs for earlier generations say
 688     *
 689     *   "In dword format, one GRF may store up to 8 results. When the register
 690     *    is used later as a vector of Booleans, as only LSB at each channel
 691     *    contains meaning [sic] data, software should make sure all higher bits
 692     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 693     *
 694     * We select the representation of a true boolean uniform to be ~0, and fix
 695     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 696     */
 697    ctx->Const.UniformBooleanTrue = ~0;
 698
 699    /* From the gen4 PRM, volume 4 page 127:
 700     *
 701     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 702     *      the base address of the first element of the surface, computed in
 703     *      software by adding the surface base address to the byte offset of
 704     *      the element in the buffer."
 705     *
 706     * However, unaligned accesses are slower, so enforce buffer alignment.
 707     *
 708     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 709     * restriction: the start of the buffer needs to be 32B aligned.
 710     */
 711    ctx->Const.UniformBufferOffsetAlignment = 32;
 712
 713    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 714     * that we can safely have the CPU and GPU writing the same SSBO on
 715     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 716     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 717     * be updating disjoint regions of the buffer simultaneously and that will
 718     * break if the regions overlap the same cacheline.
 719     */
 720    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 721    ctx->Const.TextureBufferOffsetAlignment = 16;
 722    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 723
 724    if (devinfo->gen >= 6) {
 725       ctx->Const.MaxVarying = 32;
 726       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 727       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 728          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 729       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 730       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 731       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 732       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 733       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 734       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 735    }
 736
 737    /* We want the GLSL compiler to emit code that uses condition codes */
 738    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 739       ctx->Const.ShaderCompilerOptions[i] =
 740          brw->screen->compiler->glsl_compiler_options[i];
 741    }
 742
 743    if (devinfo->gen >= 7) {
 744       ctx->Const.MaxViewportWidth = 32768;
 745       ctx->Const.MaxViewportHeight = 32768;
 746    }
 747
 748    /* ARB_viewport_array, OES_viewport_array */
 749    if (devinfo->gen >= 6) {
 750       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 751       ctx->Const.ViewportSubpixelBits = 8;
 752
 753       /* Cast to float before negating because MaxViewportWidth is unsigned.
 754        */
 755       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 756       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 757    }
 758
 759    /* ARB_gpu_shader5 */
 760    if (devinfo->gen >= 7)
 761       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 762
 763    /* ARB_framebuffer_no_attachments */
 764    ctx->Const.MaxFramebufferWidth = 16384;
 765    ctx->Const.MaxFramebufferHeight = 16384;
 766    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 767    ctx->Const.MaxFramebufferSamples = max_samples;
 768
 769    /* OES_primitive_bounding_box */
 770    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 771
 772    /* TODO: We should be able to use STD430 packing by default on all hardware
 773     * but some piglit tests [1] currently fail on SNB when this is enabled.
 774     * The problem is the messages we're using for doing uniform pulls
 775     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 776     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 777     * sampler which doesn't have these restrictions.
 778     *
 779     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 780     * pull an entire cache line at a time for constant offset loads both of
 781     * which support almost any alignment.
 782     *
 783     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 784     */
 785    if (devinfo->gen >= 7)
 786       ctx->Const.UseSTD430AsDefaultPacking = true;
 787
 788    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 789       ctx->Const.AllowMappedBuffersDuringExecution = true;
 790
 791    /* GL_ARB_get_program_binary */
 792    ctx->Const.NumProgramBinaryFormats = 1;
 793 }
 794
 795 static void
 796 brw_initialize_cs_context_constants(struct brw_context *brw)
 797 {
 798    struct gl_context *ctx = &brw->ctx;
 799    const struct intel_screen *screen = brw->screen;
 800    struct gen_device_info *devinfo = &brw->screen->devinfo;
 801
 802    /* FINISHME: Do this for all platforms that the kernel supports */
 803    if (devinfo->is_cherryview &&
 804        screen->subslice_total > 0 && screen->eu_total > 0) {
 805       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 806       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 807
 808       /* Fuse configurations may give more threads than expected, never less. */
 809       if (max_cs_threads > devinfo->max_cs_threads)
 810          devinfo->max_cs_threads = max_cs_threads;
 811    }
 812
 813    /* Maximum number of scalar compute shader invocations that can be run in
 814     * parallel in the same subslice assuming SIMD32 dispatch.
 815     *
 816     * We don't advertise more than 64 threads, because we are limited to 64 by
 817     * our usage of thread_width_max in the gpgpu walker command. This only
 818     * currently impacts Haswell, which otherwise might be able to advertise 70
 819     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 820     * required the number of invocation needed for ARB_compute_shader.
 821     */
 822    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 823    const uint32_t max_invocations = 32 * max_threads;
 824    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 825    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 826    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 827    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 828    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 829 }
 830
 831 /**
 832  * Process driconf (drirc) options, setting appropriate context flags.
 833  *
 834  * intelInitExtensions still pokes at optionCache directly, in order to
 835  * avoid advertising various extensions.  No flags are set, so it makes
 836  * sense to continue doing that there.
 837  */
 838 static void
 839 brw_process_driconf_options(struct brw_context *brw)
 840 {
 841    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 842    struct gl_context *ctx = &brw->ctx;
 843
 844    driOptionCache *options = &brw->optionCache;
 845    driParseConfigFiles(options, &brw->screen->optionCache,
 846                        brw->driContext->driScreenPriv->myNum,
 847                        "i965", NULL);
 848
 849    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 850    switch (bo_reuse_mode) {
 851    case DRI_CONF_BO_REUSE_DISABLED:
 852       break;
 853    case DRI_CONF_BO_REUSE_ALL:
 854       brw_bufmgr_enable_reuse(brw->bufmgr);
 855       break;
 856    }
 857
 858    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 859        brw->has_hiz = false;
 860        /* On gen6, you can only do separate stencil with HIZ. */
 861        if (devinfo->gen == 6)
 862           brw->has_separate_stencil = false;
 863    }
 864
 865    if (driQueryOptionb(options, "mesa_no_error"))
 866       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 867
 868    if (driQueryOptionb(options, "always_flush_batch")) {
 869       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 870       brw->always_flush_batch = true;
 871    }
 872
 873    if (driQueryOptionb(options, "always_flush_cache")) {
 874       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 875       brw->always_flush_cache = true;
 876    }
 877
 878    if (driQueryOptionb(options, "disable_throttling")) {
 879       fprintf(stderr, "disabling flush throttling\n");
 880       brw->disable_throttling = true;
 881    }
 882
 883    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 884
 885    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 886       brw->screen->compiler->precise_trig = true;
 887
 888    ctx->Const.ForceGLSLExtensionsWarn =
 889       driQueryOptionb(options, "force_glsl_extensions_warn");
 890
 891    ctx->Const.ForceGLSLVersion =
 892       driQueryOptioni(options, "force_glsl_version");
 893
 894    ctx->Const.DisableGLSLLineContinuations =
 895       driQueryOptionb(options, "disable_glsl_line_continuations");
 896
 897    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 898       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 899
 900    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 901       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 902
 903    ctx->Const.AllowHigherCompatVersion =
 904       driQueryOptionb(options, "allow_higher_compat_version");
 905
 906    ctx->Const.ForceGLSLAbsSqrt =
 907       driQueryOptionb(options, "force_glsl_abs_sqrt");
 908
 909    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 910
 911    brw->dual_color_blend_by_location =
 912       driQueryOptionb(options, "dual_color_blend_by_location");
 913
 914    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 915       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 916
 917    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 918    driComputeOptionsSha1(&brw->screen->optionCache,
 919                          ctx->Const.dri_config_options_sha1);
 920 }
 921
 922 GLboolean
 923 brwCreateContext(gl_api api,
 924                  const struct gl_config *mesaVis,
 925                  __DRIcontext *driContextPriv,
 926                  const struct __DriverContextConfig *ctx_config,
 927                  unsigned *dri_ctx_error,
 928                  void *sharedContextPrivate)
 929 {
 930    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 931    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 932    const struct gen_device_info *devinfo = &screen->devinfo;
 933    struct dd_function_table functions;
 934
 935    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 936     * provides us with context reset notifications.
 937     */
 938    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 939                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 940                             __DRI_CTX_FLAG_NO_ERROR;
 941
 942    if (screen->has_context_reset_notification)
 943       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 944
 945    if (ctx_config->flags & ~allowed_flags) {
 946       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 947       return false;
 948    }
 949
 950    if (ctx_config->attribute_mask &
 951        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 952          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 953       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 954       return false;
 955    }
 956
 957    bool notify_reset =
 958       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 959        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 960
 961    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 962    if (!brw) {
 963       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 964       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 965       return false;
 966    }
 967    brw->perf_ctx = gen_perf_new_context(brw);
 968
 969    driContextPriv->driverPrivate = brw;
 970    brw->driContext = driContextPriv;
 971    brw->screen = screen;
 972    brw->bufmgr = screen->bufmgr;
 973
 974    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 975    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 976
 977    brw->has_swizzling = screen->hw_has_swizzling;
 978
 979    brw->isl_dev = screen->isl_dev;
 980
 981    brw->vs.base.stage = MESA_SHADER_VERTEX;
 982    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 983    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 984    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 985    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 986    brw->cs.base.stage = MESA_SHADER_COMPUTE;
 987
 988    brw_init_driver_functions(brw, &functions);
 989
 990    if (notify_reset)
 991       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 992
 993    brw_process_driconf_options(brw);
 994
 995    if (api == API_OPENGL_CORE &&
 996        driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
 997       api = API_OPENGL_COMPAT;
 998    }
 999
1000    struct gl_context *ctx = &brw->ctx;
1001
1002    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
1003       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1004       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
1005       intelDestroyContext(driContextPriv);
1006       return false;
1007    }
1008
1009    driContextSetFlags(ctx, ctx_config->flags);
1010
1011    /* Initialize the software rasterizer and helper modules.
1012     *
1013     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1014     * software fallbacks (which we have to support on legacy GL to do weird
1015     * glDrawPixels(), glBitmap(), and other functions).
1016     */
1017    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1018       _swrast_CreateContext(ctx);
1019    }
1020
1021    _vbo_CreateContext(ctx);
1022    if (ctx->swrast_context) {
1023       _tnl_CreateContext(ctx);
1024       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1025       _swsetup_CreateContext(ctx);
1026
1027       /* Configure swrast to match hardware characteristics: */
1028       _swrast_allow_pixel_fog(ctx, false);
1029       _swrast_allow_vertex_fog(ctx, true);
1030    }
1031
1032    _mesa_meta_init(ctx);
1033
1034    if (INTEL_DEBUG & DEBUG_PERF)
1035       brw->perf_debug = true;
1036
1037    brw_initialize_cs_context_constants(brw);
1038    brw_initialize_context_constants(brw);
1039
1040    ctx->Const.ResetStrategy = notify_reset
1041       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1042
1043    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1044    _mesa_init_point(ctx);
1045
1046    intel_fbo_init(brw);
1047
1048    intel_batchbuffer_init(brw);
1049
1050    /* Create a new hardware context.  Using a hardware context means that
1051     * our GPU state will be saved/restored on context switch, allowing us
1052     * to assume that the GPU is in the same state we left it in.
1053     *
1054     * This is required for transform feedback buffer offsets, query objects,
1055     * and also allows us to reduce how much state we have to emit.
1056     */
1057    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1058    if (!brw->hw_ctx && devinfo->gen >= 6) {
1059       fprintf(stderr, "Failed to create hardware context.\n");
1060       intelDestroyContext(driContextPriv);
1061       return false;
1062    }
1063
1064    if (brw->hw_ctx) {
1065       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1066       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1067          switch (ctx_config->priority) {
1068          case __DRI_CTX_PRIORITY_LOW:
1069             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1070             break;
1071          case __DRI_CTX_PRIORITY_HIGH:
1072             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1073             break;
1074          }
1075       }
1076       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1077           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1078          fprintf(stderr,
1079                  "Failed to set priority [%d:%d] for hardware context.\n",
1080                  ctx_config->priority, hw_priority);
1081          intelDestroyContext(driContextPriv);
1082          return false;
1083       }
1084    }
1085
1086    if (brw_init_pipe_control(brw, devinfo)) {
1087       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1088       intelDestroyContext(driContextPriv);
1089       return false;
1090    }
1091
1092    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1093
1094    brw_init_state(brw);
1095
1096    intelInitExtensions(ctx);
1097
1098    brw_init_surface_formats(brw);
1099
1100    brw_blorp_init(brw);
1101
1102    brw->urb.size = devinfo->urb.size;
1103
1104    if (devinfo->gen == 6)
1105       brw->urb.gs_present = false;
1106
1107    brw->prim_restart.in_progress = false;
1108    brw->prim_restart.enable_cut_index = false;
1109    brw->gs.enabled = false;
1110    brw->clip.viewport_count = 1;
1111
1112    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1113
1114    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1115
1116    ctx->VertexProgram._MaintainTnlProgram = true;
1117    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1118
1119    brw_draw_init( brw );
1120
1121    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1122       /* Turn on some extra GL_ARB_debug_output generation. */
1123       brw->perf_debug = true;
1124    }
1125
1126    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1127       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1128       ctx->Const.RobustAccess = GL_TRUE;
1129    }
1130
1131    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1132       brw_init_shader_time(brw);
1133
1134    _mesa_override_extensions(ctx);
1135    _mesa_compute_version(ctx);
1136
1137    /* GL_ARB_gl_spirv */
1138    if (ctx->Extensions.ARB_gl_spirv) {
1139       brw_initialize_spirv_supported_capabilities(brw);
1140
1141       if (ctx->Extensions.ARB_spirv_extensions) {
1142          /* GL_ARB_spirv_extensions */
1143          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1144          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1145                                                &ctx->Const.SpirVCapabilities);
1146       }
1147    }
1148
1149    _mesa_initialize_dispatch_tables(ctx);
1150    _mesa_initialize_vbo_vtxfmt(ctx);
1151
1152    if (ctx->Extensions.INTEL_performance_query)
1153       brw_init_performance_queries(brw);
1154
1155    vbo_use_buffer_objects(ctx);
1156    vbo_always_unmap_buffers(ctx);
1157
1158    brw->ctx.Cache = brw->screen->disk_cache;
1159
1160    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1161        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1162       /* Loader supports multithreading, and so do we. */
1163       _mesa_glthread_init(ctx);
1164    }
1165
1166    return true;
1167 }
1168
1169 void
1170 intelDestroyContext(__DRIcontext * driContextPriv)
1171 {
1172    struct brw_context *brw =
1173       (struct brw_context *) driContextPriv->driverPrivate;
1174    struct gl_context *ctx = &brw->ctx;
1175
1176    GET_CURRENT_CONTEXT(curctx);
1177
1178    if (curctx == NULL) {
1179       /* No current context, but we need one to release
1180        * renderbuffer surface when we release framebuffer.
1181        * So temporarily bind the context.
1182        */
1183       _mesa_make_current(ctx, NULL, NULL);
1184    }
1185
1186    _mesa_glthread_destroy(&brw->ctx);
1187
1188    _mesa_meta_free(&brw->ctx);
1189
1190    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1191       /* Force a report. */
1192       brw->shader_time.report_time = 0;
1193
1194       brw_collect_and_report_shader_time(brw);
1195       brw_destroy_shader_time(brw);
1196    }
1197
1198    blorp_finish(&brw->blorp);
1199
1200    brw_destroy_state(brw);
1201    brw_draw_destroy(brw);
1202
1203    brw_bo_unreference(brw->curbe.curbe_bo);
1204
1205    brw_bo_unreference(brw->vs.base.scratch_bo);
1206    brw_bo_unreference(brw->tcs.base.scratch_bo);
1207    brw_bo_unreference(brw->tes.base.scratch_bo);
1208    brw_bo_unreference(brw->gs.base.scratch_bo);
1209    brw_bo_unreference(brw->wm.base.scratch_bo);
1210
1211    brw_bo_unreference(brw->vs.base.push_const_bo);
1212    brw_bo_unreference(brw->tcs.base.push_const_bo);
1213    brw_bo_unreference(brw->tes.base.push_const_bo);
1214    brw_bo_unreference(brw->gs.base.push_const_bo);
1215    brw_bo_unreference(brw->wm.base.push_const_bo);
1216
1217    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1218
1219    if (ctx->swrast_context) {
1220       _swsetup_DestroyContext(&brw->ctx);
1221       _tnl_DestroyContext(&brw->ctx);
1222    }
1223    _vbo_DestroyContext(&brw->ctx);
1224
1225    if (ctx->swrast_context)
1226       _swrast_DestroyContext(&brw->ctx);
1227
1228    brw_fini_pipe_control(brw);
1229    intel_batchbuffer_free(&brw->batch);
1230
1231    brw_bo_unreference(brw->throttle_batch[1]);
1232    brw_bo_unreference(brw->throttle_batch[0]);
1233    brw->throttle_batch[1] = NULL;
1234    brw->throttle_batch[0] = NULL;
1235
1236    driDestroyOptionCache(&brw->optionCache);
1237
1238    /* free the Mesa context */
1239    _mesa_free_context_data(&brw->ctx, true);
1240
1241    ralloc_free(brw);
1242    driContextPriv->driverPrivate = NULL;
1243 }
1244
1245 GLboolean
1246 intelUnbindContext(__DRIcontext * driContextPriv)
1247 {
1248    GET_CURRENT_CONTEXT(ctx);
1249    _mesa_glthread_finish(ctx);
1250
1251    /* Unset current context and dispath table */
1252    _mesa_make_current(NULL, NULL, NULL);
1253
1254    return true;
1255 }
1256
1257 /**
1258  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1259  * on window system framebuffers.
1260  *
1261  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1262  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1263  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1264  * for a visual where you're guaranteed to be capable, but it turns out that
1265  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1266  * incapable ones, because there's no difference between the two in resources
1267  * used.  Applications thus get built that accidentally rely on the default
1268  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1269  * great...
1270  *
1271  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1272  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1273  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1274  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1275  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1276  * and get no sRGB encode (assuming that both kinds of visual are available).
1277  * Thus our choice to support sRGB by default on our visuals for desktop would
1278  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1279  *
1280  * Unfortunately, renderbuffer setup happens before a context is created.  So
1281  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1282  * context (without an sRGB visual), we go turn that back off before anyone
1283  * finds out.
1284  */
1285 static void
1286 intel_gles3_srgb_workaround(struct brw_context *brw,
1287                             struct gl_framebuffer *fb)
1288 {
1289    struct gl_context *ctx = &brw->ctx;
1290
1291    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1292       return;
1293
1294    for (int i = 0; i < BUFFER_COUNT; i++) {
1295       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1296
1297       /* Check if sRGB was specifically asked for. */
1298       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1299       if (irb && irb->need_srgb)
1300          return;
1301
1302       if (rb)
1303          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1304    }
1305    /* Disable sRGB from framebuffers that are not compatible. */
1306    fb->Visual.sRGBCapable = false;
1307 }
1308
1309 GLboolean
1310 intelMakeCurrent(__DRIcontext * driContextPriv,
1311                  __DRIdrawable * driDrawPriv,
1312                  __DRIdrawable * driReadPriv)
1313 {
1314    struct brw_context *brw;
1315
1316    if (driContextPriv)
1317       brw = (struct brw_context *) driContextPriv->driverPrivate;
1318    else
1319       brw = NULL;
1320
1321    if (driContextPriv) {
1322       struct gl_context *ctx = &brw->ctx;
1323       struct gl_framebuffer *fb, *readFb;
1324
1325       if (driDrawPriv == NULL) {
1326          fb = _mesa_get_incomplete_framebuffer();
1327       } else {
1328          fb = driDrawPriv->driverPrivate;
1329          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1330       }
1331
1332       if (driReadPriv == NULL) {
1333          readFb = _mesa_get_incomplete_framebuffer();
1334       } else {
1335          readFb = driReadPriv->driverPrivate;
1336          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1337       }
1338
1339       /* The sRGB workaround changes the renderbuffer's format. We must change
1340        * the format before the renderbuffer's miptree get's allocated, otherwise
1341        * the formats of the renderbuffer and its miptree will differ.
1342        */
1343       intel_gles3_srgb_workaround(brw, fb);
1344       intel_gles3_srgb_workaround(brw, readFb);
1345
1346       /* If the context viewport hasn't been initialized, force a call out to
1347        * the loader to get buffers so we have a drawable size for the initial
1348        * viewport. */
1349       if (!brw->ctx.ViewportInitialized)
1350          intel_prepare_render(brw);
1351
1352       _mesa_make_current(ctx, fb, readFb);
1353    } else {
1354       GET_CURRENT_CONTEXT(ctx);
1355       _mesa_glthread_finish(ctx);
1356       _mesa_make_current(NULL, NULL, NULL);
1357    }
1358
1359    return true;
1360 }
1361
1362 void
1363 intel_resolve_for_dri2_flush(struct brw_context *brw,
1364                              __DRIdrawable *drawable)
1365 {
1366    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1367
1368    if (devinfo->gen < 6) {
1369       /* MSAA and fast color clear are not supported, so don't waste time
1370        * checking whether a resolve is needed.
1371        */
1372       return;
1373    }
1374
1375    struct gl_framebuffer *fb = drawable->driverPrivate;
1376    struct intel_renderbuffer *rb;
1377
1378    /* Usually, only the back buffer will need to be downsampled. However,
1379     * the front buffer will also need it if the user has rendered into it.
1380     */
1381    static const gl_buffer_index buffers[2] = {
1382          BUFFER_BACK_LEFT,
1383          BUFFER_FRONT_LEFT,
1384    };
1385
1386    for (int i = 0; i < 2; ++i) {
1387       rb = intel_get_renderbuffer(fb, buffers[i]);
1388       if (rb == NULL || rb->mt == NULL)
1389          continue;
1390       if (rb->mt->surf.samples == 1) {
1391          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1392                 rb->layer_count == 1);
1393          intel_miptree_prepare_external(brw, rb->mt);
1394       } else {
1395          intel_renderbuffer_downsample(brw, rb);
1396
1397          /* Call prepare_external on the single-sample miptree to do any
1398           * needed resolves prior to handing it off to the window system.
1399           * This is needed in the case that rb->singlesample_mt is Y-tiled
1400           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1401           * this case, the MSAA resolve above will write compressed data into
1402           * rb->singlesample_mt.
1403           *
1404           * TODO: Some day, if we decide to care about the tiny performance
1405           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1406           * we could detect this case and just allocate the single-sampled
1407           * miptree without aux.  However, that would be a lot of plumbing and
1408           * this is a rather exotic case so it's not really worth it.
1409           */
1410          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1411       }
1412    }
1413 }
1414
1415 static unsigned
1416 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1417 {
1418    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1419 }
1420
1421 static void
1422 intel_query_dri2_buffers(struct brw_context *brw,
1423                          __DRIdrawable *drawable,
1424                          __DRIbuffer **buffers,
1425                          int *count);
1426
1427 static void
1428 intel_process_dri2_buffer(struct brw_context *brw,
1429                           __DRIdrawable *drawable,
1430                           __DRIbuffer *buffer,
1431                           struct intel_renderbuffer *rb,
1432                           const char *buffer_name);
1433
1434 static void
1435 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1436
1437 static void
1438 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1439 {
1440    struct gl_framebuffer *fb = drawable->driverPrivate;
1441    struct intel_renderbuffer *rb;
1442    __DRIbuffer *buffers = NULL;
1443    int count;
1444    const char *region_name;
1445
1446    /* Set this up front, so that in case our buffers get invalidated
1447     * while we're getting new buffers, we don't clobber the stamp and
1448     * thus ignore the invalidate. */
1449    drawable->lastStamp = drawable->dri2.stamp;
1450
1451    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1452       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1453
1454    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1455
1456    if (buffers == NULL)
1457       return;
1458
1459    for (int i = 0; i < count; i++) {
1460        switch (buffers[i].attachment) {
1461        case __DRI_BUFFER_FRONT_LEFT:
1462            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1463            region_name = "dri2 front buffer";
1464            break;
1465
1466        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1467            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1468            region_name = "dri2 fake front buffer";
1469            break;
1470
1471        case __DRI_BUFFER_BACK_LEFT:
1472            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1473            region_name = "dri2 back buffer";
1474            break;
1475
1476        case __DRI_BUFFER_DEPTH:
1477        case __DRI_BUFFER_HIZ:
1478        case __DRI_BUFFER_DEPTH_STENCIL:
1479        case __DRI_BUFFER_STENCIL:
1480        case __DRI_BUFFER_ACCUM:
1481        default:
1482            fprintf(stderr,
1483                    "unhandled buffer attach event, attachment type %d\n",
1484                    buffers[i].attachment);
1485            return;
1486        }
1487
1488        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1489    }
1490
1491 }
1492
1493 void
1494 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1495 {
1496    struct brw_context *brw = context->driverPrivate;
1497    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1498
1499    /* Set this up front, so that in case our buffers get invalidated
1500     * while we're getting new buffers, we don't clobber the stamp and
1501     * thus ignore the invalidate. */
1502    drawable->lastStamp = drawable->dri2.stamp;
1503
1504    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1505       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1506
1507    if (dri_screen->image.loader)
1508       intel_update_image_buffers(brw, drawable);
1509    else
1510       intel_update_dri2_buffers(brw, drawable);
1511
1512    driUpdateFramebufferSize(&brw->ctx, drawable);
1513 }
1514
1515 /**
1516  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1517  * state is required.
1518  */
1519 void
1520 intel_prepare_render(struct brw_context *brw)
1521 {
1522    struct gl_context *ctx = &brw->ctx;
1523    __DRIcontext *driContext = brw->driContext;
1524    __DRIdrawable *drawable;
1525
1526    drawable = driContext->driDrawablePriv;
1527    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1528       if (drawable->lastStamp != drawable->dri2.stamp)
1529          intel_update_renderbuffers(driContext, drawable);
1530       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1531    }
1532
1533    drawable = driContext->driReadablePriv;
1534    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1535       if (drawable->lastStamp != drawable->dri2.stamp)
1536          intel_update_renderbuffers(driContext, drawable);
1537       driContext->dri2.read_stamp = drawable->dri2.stamp;
1538    }
1539
1540    /* If we're currently rendering to the front buffer, the rendering
1541     * that will happen next will probably dirty the front buffer.  So
1542     * mark it as dirty here.
1543     */
1544    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1545       brw->front_buffer_dirty = true;
1546
1547    if (brw->is_shared_buffer_bound) {
1548       /* Subsequent rendering will probably dirty the shared buffer. */
1549       brw->is_shared_buffer_dirty = true;
1550    }
1551 }
1552
1553 /**
1554  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1555  *
1556  * To determine which DRI buffers to request, examine the renderbuffers
1557  * attached to the drawable's framebuffer. Then request the buffers with
1558  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1559  *
1560  * This is called from intel_update_renderbuffers().
1561  *
1562  * \param drawable      Drawable whose buffers are queried.
1563  * \param buffers       [out] List of buffers returned by DRI2 query.
1564  * \param buffer_count  [out] Number of buffers returned.
1565  *
1566  * \see intel_update_renderbuffers()
1567  * \see DRI2GetBuffers()
1568  * \see DRI2GetBuffersWithFormat()
1569  */
1570 static void
1571 intel_query_dri2_buffers(struct brw_context *brw,
1572                          __DRIdrawable *drawable,
1573                          __DRIbuffer **buffers,
1574                          int *buffer_count)
1575 {
1576    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1577    struct gl_framebuffer *fb = drawable->driverPrivate;
1578    int i = 0;
1579    unsigned attachments[8];
1580
1581    struct intel_renderbuffer *front_rb;
1582    struct intel_renderbuffer *back_rb;
1583
1584    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1585    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1586
1587    memset(attachments, 0, sizeof(attachments));
1588    if ((_mesa_is_front_buffer_drawing(fb) ||
1589         _mesa_is_front_buffer_reading(fb) ||
1590         !back_rb) && front_rb) {
1591       /* If a fake front buffer is in use, then querying for
1592        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1593        * the real front buffer to the fake front buffer.  So before doing the
1594        * query, we need to make sure all the pending drawing has landed in the
1595        * real front buffer.
1596        */
1597       intel_batchbuffer_flush(brw);
1598       intel_flush_front(&brw->ctx);
1599
1600       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1601       attachments[i++] = intel_bits_per_pixel(front_rb);
1602    } else if (front_rb && brw->front_buffer_dirty) {
1603       /* We have pending front buffer rendering, but we aren't querying for a
1604        * front buffer.  If the front buffer we have is a fake front buffer,
1605        * the X server is going to throw it away when it processes the query.
1606        * So before doing the query, make sure all the pending drawing has
1607        * landed in the real front buffer.
1608        */
1609       intel_batchbuffer_flush(brw);
1610       intel_flush_front(&brw->ctx);
1611    }
1612
1613    if (back_rb) {
1614       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1615       attachments[i++] = intel_bits_per_pixel(back_rb);
1616    }
1617
1618    assert(i <= ARRAY_SIZE(attachments));
1619
1620    *buffers =
1621       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1622                                                     &drawable->w,
1623                                                     &drawable->h,
1624                                                     attachments, i / 2,
1625                                                     buffer_count,
1626                                                     drawable->loaderPrivate);
1627 }
1628
1629 /**
1630  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1631  *
1632  * This is called from intel_update_renderbuffers().
1633  *
1634  * \par Note:
1635  *    DRI buffers whose attachment point is DRI2BufferStencil or
1636  *    DRI2BufferDepthStencil are handled as special cases.
1637  *
1638  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1639  *        that is passed to brw_bo_gem_create_from_name().
1640  *
1641  * \see intel_update_renderbuffers()
1642  */
1643 static void
1644 intel_process_dri2_buffer(struct brw_context *brw,
1645                           __DRIdrawable *drawable,
1646                           __DRIbuffer *buffer,
1647                           struct intel_renderbuffer *rb,
1648                           const char *buffer_name)
1649 {
1650    struct gl_framebuffer *fb = drawable->driverPrivate;
1651    struct brw_bo *bo;
1652
1653    if (!rb)
1654       return;
1655
1656    unsigned num_samples = rb->Base.Base.NumSamples;
1657
1658    /* We try to avoid closing and reopening the same BO name, because the first
1659     * use of a mapping of the buffer involves a bunch of page faulting which is
1660     * moderately expensive.
1661     */
1662    struct intel_mipmap_tree *last_mt;
1663    if (num_samples == 0)
1664       last_mt = rb->mt;
1665    else
1666       last_mt = rb->singlesample_mt;
1667
1668    uint32_t old_name = 0;
1669    if (last_mt) {
1670        /* The bo already has a name because the miptree was created by a
1671         * previous call to intel_process_dri2_buffer(). If a bo already has a
1672         * name, then brw_bo_flink() is a low-cost getter.  It does not
1673         * create a new name.
1674         */
1675       brw_bo_flink(last_mt->bo, &old_name);
1676    }
1677
1678    if (old_name == buffer->name)
1679       return;
1680
1681    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1682       fprintf(stderr,
1683               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1684               buffer->name, buffer->attachment,
1685               buffer->cpp, buffer->pitch);
1686    }
1687
1688    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1689                                           buffer->name);
1690    if (!bo) {
1691       fprintf(stderr,
1692               "Failed to open BO for returned DRI2 buffer "
1693               "(%dx%d, %s, named %d).\n"
1694               "This is likely a bug in the X Server that will lead to a "
1695               "crash soon.\n",
1696               drawable->w, drawable->h, buffer_name, buffer->name);
1697       return;
1698    }
1699
1700    uint32_t tiling, swizzle;
1701    brw_bo_get_tiling(bo, &tiling, &swizzle);
1702
1703    struct intel_mipmap_tree *mt =
1704       intel_miptree_create_for_bo(brw,
1705                                   bo,
1706                                   intel_rb_format(rb),
1707                                   0,
1708                                   drawable->w,
1709                                   drawable->h,
1710                                   1,
1711                                   buffer->pitch,
1712                                   isl_tiling_from_i915_tiling(tiling),
1713                                   MIPTREE_CREATE_DEFAULT);
1714    if (!mt) {
1715       brw_bo_unreference(bo);
1716       return;
1717    }
1718
1719    /* We got this BO from X11.  We cana't assume that we have coherent texture
1720     * access because X may suddenly decide to use it for scan-out which would
1721     * destroy coherency.
1722     */
1723    bo->cache_coherent = false;
1724
1725    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1726                                                  drawable->w, drawable->h,
1727                                                  buffer->pitch)) {
1728       brw_bo_unreference(bo);
1729       intel_miptree_release(&mt);
1730       return;
1731    }
1732
1733    if (_mesa_is_front_buffer_drawing(fb) &&
1734        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1735         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1736        rb->Base.Base.NumSamples > 1) {
1737       intel_renderbuffer_upsample(brw, rb);
1738    }
1739
1740    assert(rb->mt);
1741
1742    brw_bo_unreference(bo);
1743 }
1744
1745 /**
1746  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1747  *
1748  * To determine which DRI buffers to request, examine the renderbuffers
1749  * attached to the drawable's framebuffer. Then request the buffers from
1750  * the image loader
1751  *
1752  * This is called from intel_update_renderbuffers().
1753  *
1754  * \param drawable      Drawable whose buffers are queried.
1755  * \param buffers       [out] List of buffers returned by DRI2 query.
1756  * \param buffer_count  [out] Number of buffers returned.
1757  *
1758  * \see intel_update_renderbuffers()
1759  */
1760
1761 static void
1762 intel_update_image_buffer(struct brw_context *intel,
1763                           __DRIdrawable *drawable,
1764                           struct intel_renderbuffer *rb,
1765                           __DRIimage *buffer,
1766                           enum __DRIimageBufferMask buffer_type)
1767 {
1768    struct gl_framebuffer *fb = drawable->driverPrivate;
1769
1770    if (!rb || !buffer->bo)
1771       return;
1772
1773    unsigned num_samples = rb->Base.Base.NumSamples;
1774
1775    /* Check and see if we're already bound to the right
1776     * buffer object
1777     */
1778    struct intel_mipmap_tree *last_mt;
1779    if (num_samples == 0)
1780       last_mt = rb->mt;
1781    else
1782       last_mt = rb->singlesample_mt;
1783
1784    if (last_mt && last_mt->bo == buffer->bo) {
1785       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1786          intel_miptree_make_shareable(intel, last_mt);
1787       }
1788       return;
1789    }
1790
1791    /* Only allow internal compression if samples == 0.  For multisampled
1792     * window system buffers, the only thing the single-sampled buffer is used
1793     * for is as a resolve target.  If we do any compression beyond what is
1794     * supported by the window system, we will just have to resolve so it's
1795     * probably better to just not bother.
1796     */
1797    const bool allow_internal_aux = (num_samples == 0);
1798
1799    struct intel_mipmap_tree *mt =
1800       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1801                                          intel_rb_format(rb),
1802                                          allow_internal_aux);
1803    if (!mt)
1804       return;
1805
1806    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1807                                                  buffer->width, buffer->height,
1808                                                  buffer->pitch)) {
1809       intel_miptree_release(&mt);
1810       return;
1811    }
1812
1813    if (_mesa_is_front_buffer_drawing(fb) &&
1814        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1815        rb->Base.Base.NumSamples > 1) {
1816       intel_renderbuffer_upsample(intel, rb);
1817    }
1818
1819    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1820       /* The compositor and the application may access this image
1821        * concurrently. The display hardware may even scanout the image while
1822        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1823        * concurrent access, so permanently disable aux for this miptree.
1824        *
1825        * Perhaps we could improve overall application performance by
1826        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1827        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1828        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1829        * approach to be highly dependent on the application's GL usage.
1830        *
1831        * I [chadv] expect clever disabling/reenabling to be counterproductive
1832        * in the use cases I care about: applications that render nearly
1833        * realtime handwriting to the surface while possibly undergiong
1834        * simultaneously scanout as a display plane. The app requires low
1835        * render latency. Even though the app spends most of its time in
1836        * shared-buffer mode, it also frequently transitions between
1837        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1838        * mode.  Visual sutter during the transitions should be avoided.
1839        *
1840        * In this case, I [chadv] believe reducing the GPU workload at
1841        * shared-buffer/double-buffer transitions would offer a smoother app
1842        * experience than any savings due to aux compression. But I've
1843        * collected no data to prove my theory.
1844        */
1845       intel_miptree_make_shareable(intel, mt);
1846    }
1847 }
1848
1849 static void
1850 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1851 {
1852    struct gl_framebuffer *fb = drawable->driverPrivate;
1853    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1854    struct intel_renderbuffer *front_rb;
1855    struct intel_renderbuffer *back_rb;
1856    struct __DRIimageList images;
1857    mesa_format format;
1858    uint32_t buffer_mask = 0;
1859    int ret;
1860
1861    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1862    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1863
1864    if (back_rb)
1865       format = intel_rb_format(back_rb);
1866    else if (front_rb)
1867       format = intel_rb_format(front_rb);
1868    else
1869       return;
1870
1871    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1872                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1873       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1874    }
1875
1876    if (back_rb)
1877       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1878
1879    ret = dri_screen->image.loader->getBuffers(drawable,
1880                                               driGLFormatToImageFormat(format),
1881                                               &drawable->dri2.stamp,
1882                                               drawable->loaderPrivate,
1883                                               buffer_mask,
1884                                               &images);
1885    if (!ret)
1886       return;
1887
1888    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1889       drawable->w = images.front->width;
1890       drawable->h = images.front->height;
1891       intel_update_image_buffer(brw,
1892                                 drawable,
1893                                 front_rb,
1894                                 images.front,
1895                                 __DRI_IMAGE_BUFFER_FRONT);
1896    }
1897
1898    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1899       drawable->w = images.back->width;
1900       drawable->h = images.back->height;
1901       intel_update_image_buffer(brw,
1902                                 drawable,
1903                                 back_rb,
1904                                 images.back,
1905                                 __DRI_IMAGE_BUFFER_BACK);
1906    }
1907
1908    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1909       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1910       drawable->w = images.back->width;
1911       drawable->h = images.back->height;
1912       intel_update_image_buffer(brw,
1913                                 drawable,
1914                                 back_rb,
1915                                 images.back,
1916                                 __DRI_IMAGE_BUFFER_SHARED);
1917       brw->is_shared_buffer_bound = true;
1918    } else {
1919       brw->is_shared_buffer_bound = false;
1920       brw->is_shared_buffer_dirty = false;
1921    }
1922 }