src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "compiler/nir/nir.h"
  35 #include "main/api_exec.h"
  36 #include "main/context.h"
  37 #include "main/fbobject.h"
  38 #include "main/extensions.h"
  39 #include "main/glthread.h"
  40 #include "main/imports.h"
  41 #include "main/macros.h"
  42 #include "main/points.h"
  43 #include "main/version.h"
  44 #include "main/vtxfmt.h"
  45 #include "main/texobj.h"
  46 #include "main/framebuffer.h"
  47 #include "main/stencil.h"
  48 #include "main/state.h"
  49 #include "main/spirv_extensions.h"
  50
  51 #include "vbo/vbo.h"
  52
  53 #include "drivers/common/driverfuncs.h"
  54 #include "drivers/common/meta.h"
  55 #include "utils.h"
  56
  57 #include "brw_context.h"
  58 #include "brw_defines.h"
  59 #include "brw_blorp.h"
  60 #include "brw_draw.h"
  61 #include "brw_state.h"
  62
  63 #include "intel_batchbuffer.h"
  64 #include "intel_buffer_objects.h"
  65 #include "intel_buffers.h"
  66 #include "intel_fbo.h"
  67 #include "intel_mipmap_tree.h"
  68 #include "intel_pixel.h"
  69 #include "intel_image.h"
  70 #include "intel_tex.h"
  71 #include "intel_tex_obj.h"
  72
  73 #include "swrast_setup/swrast_setup.h"
  74 #include "tnl/tnl.h"
  75 #include "tnl/t_pipeline.h"
  76 #include "util/ralloc.h"
  77 #include "util/debug.h"
  78 #include "util/disk_cache.h"
  79 #include "isl/isl.h"
  80
  81 #include "common/gen_defines.h"
  82
  83 #include "compiler/spirv/nir_spirv.h"
  84 /***************************************
  85  * Mesa's Driver Functions
  86  ***************************************/
  87
  88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
  89
  90 static const char *
  91 get_bsw_model(const struct intel_screen *screen)
  92 {
  93    switch (screen->eu_total) {
  94    case 16:
  95       return "405";
  96    case 12:
  97       return "400";
  98    default:
  99       return "   ";
 100    }
 101 }
 102
 103 const char *
 104 brw_get_renderer_string(const struct intel_screen *screen)
 105 {
 106    static char buf[128];
 107    const char *name = gen_get_device_name(screen->deviceID);
 108
 109    if (!name)
 110       name = "Intel Unknown";
 111
 112    snprintf(buf, sizeof(buf), "Mesa DRI %s", name);
 113
 114    /* Braswell branding is funny, so we have to fix it up here */
 115    if (screen->deviceID == 0x22B1) {
 116       char *needle = strstr(buf, "XXX");
 117       if (needle)
 118          memcpy(needle, get_bsw_model(screen), 3);
 119    }
 120
 121    return buf;
 122 }
 123
 124 static const GLubyte *
 125 intel_get_string(struct gl_context * ctx, GLenum name)
 126 {
 127    const struct brw_context *const brw = brw_context(ctx);
 128
 129    switch (name) {
 130    case GL_VENDOR:
 131       return (GLubyte *) brw_vendor_string;
 132
 133    case GL_RENDERER:
 134       return
 135          (GLubyte *) brw_get_renderer_string(brw->screen);
 136
 137    default:
 138       return NULL;
 139    }
 140 }
 141
 142 static void
 143 brw_set_background_context(struct gl_context *ctx,
 144                            struct util_queue_monitoring *queue_info)
 145 {
 146    struct brw_context *brw = brw_context(ctx);
 147    __DRIcontext *driContext = brw->driContext;
 148    __DRIscreen *driScreen = driContext->driScreenPriv;
 149    const __DRIbackgroundCallableExtension *backgroundCallable =
 150       driScreen->dri2.backgroundCallable;
 151
 152    /* Note: Mesa will only call this function if we've called
 153     * _mesa_enable_multithreading().  We only do that if the loader exposed
 154     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
 155     * backgroundCallable is not NULL.
 156     */
 157    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
 158 }
 159
 160 static void
 161 intel_viewport(struct gl_context *ctx)
 162 {
 163    struct brw_context *brw = brw_context(ctx);
 164    __DRIcontext *driContext = brw->driContext;
 165
 166    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 167       if (driContext->driDrawablePriv)
 168          dri2InvalidateDrawable(driContext->driDrawablePriv);
 169       if (driContext->driReadablePriv)
 170          dri2InvalidateDrawable(driContext->driReadablePriv);
 171    }
 172 }
 173
 174 static void
 175 intel_update_framebuffer(struct gl_context *ctx,
 176                          struct gl_framebuffer *fb)
 177 {
 178    struct brw_context *brw = brw_context(ctx);
 179
 180    /* Quantize the derived default number of samples
 181     */
 182    fb->DefaultGeometry._NumSamples =
 183       intel_quantize_num_samples(brw->screen,
 184                                  fb->DefaultGeometry.NumSamples);
 185 }
 186
 187 static void
 188 intel_update_state(struct gl_context * ctx)
 189 {
 190    GLuint new_state = ctx->NewState;
 191    struct brw_context *brw = brw_context(ctx);
 192
 193    if (ctx->swrast_context)
 194       _swrast_InvalidateState(ctx, new_state);
 195
 196    brw->NewGLState |= new_state;
 197
 198    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
 199       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
 200
 201    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
 202       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
 203       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
 204       brw->stencil_write_enabled =
 205          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
 206    }
 207
 208    if (new_state & _NEW_POLYGON)
 209       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
 210
 211    if (new_state & _NEW_BUFFERS) {
 212       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 213       if (ctx->DrawBuffer != ctx->ReadBuffer)
 214          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 215    }
 216 }
 217
 218 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 219
 220 static void
 221 intel_flush_front(struct gl_context *ctx)
 222 {
 223    struct brw_context *brw = brw_context(ctx);
 224    __DRIcontext *driContext = brw->driContext;
 225    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 226    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
 227
 228    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 229       if (flushFront(dri_screen) && driDrawable &&
 230           driDrawable->loaderPrivate) {
 231
 232          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 233           *
 234           * This potentially resolves both front and back buffer. It
 235           * is unnecessary to resolve the back, but harms nothing except
 236           * performance. And no one cares about front-buffer render
 237           * performance.
 238           */
 239          intel_resolve_for_dri2_flush(brw, driDrawable);
 240          intel_batchbuffer_flush(brw);
 241
 242          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
 243
 244          /* We set the dirty bit in intel_prepare_render() if we're
 245           * front buffer rendering once we get there.
 246           */
 247          brw->front_buffer_dirty = false;
 248       }
 249    }
 250 }
 251
 252 static void
 253 brw_display_shared_buffer(struct brw_context *brw)
 254 {
 255    __DRIcontext *dri_context = brw->driContext;
 256    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
 257    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
 258    int fence_fd = -1;
 259
 260    if (!brw->is_shared_buffer_bound)
 261       return;
 262
 263    if (!brw->is_shared_buffer_dirty)
 264       return;
 265
 266    if (brw->screen->has_exec_fence) {
 267       /* This function is always called during a flush operation, so there is
 268        * no need to flush again here. But we want to provide a fence_fd to the
 269        * loader, and a redundant flush is the easiest way to acquire one.
 270        */
 271       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
 272          return;
 273    }
 274
 275    dri_screen->mutableRenderBuffer.loader
 276       ->displaySharedBuffer(dri_drawable, fence_fd,
 277                             dri_drawable->loaderPrivate);
 278    brw->is_shared_buffer_dirty = false;
 279 }
 280
 281 static void
 282 intel_glFlush(struct gl_context *ctx)
 283 {
 284    struct brw_context *brw = brw_context(ctx);
 285
 286    intel_batchbuffer_flush(brw);
 287    intel_flush_front(ctx);
 288    brw_display_shared_buffer(brw);
 289    brw->need_flush_throttle = true;
 290 }
 291
 292 static void
 293 intel_finish(struct gl_context * ctx)
 294 {
 295    struct brw_context *brw = brw_context(ctx);
 296
 297    intel_glFlush(ctx);
 298
 299    if (brw->batch.last_bo)
 300       brw_bo_wait_rendering(brw->batch.last_bo);
 301 }
 302
 303 static void
 304 brw_init_driver_functions(struct brw_context *brw,
 305                           struct dd_function_table *functions)
 306 {
 307    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 308
 309    _mesa_init_driver_functions(functions);
 310
 311    /* GLX uses DRI2 invalidate events to handle window resizing.
 312     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 313     * which doesn't provide a mechanism for snooping the event queues.
 314     *
 315     * So EGL still relies on viewport hacks to handle window resizing.
 316     * This should go away with DRI3000.
 317     */
 318    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 319       functions->Viewport = intel_viewport;
 320
 321    functions->Flush = intel_glFlush;
 322    functions->Finish = intel_finish;
 323    functions->GetString = intel_get_string;
 324    functions->UpdateState = intel_update_state;
 325
 326    brw_init_draw_functions(functions);
 327    intelInitTextureFuncs(functions);
 328    intelInitTextureImageFuncs(functions);
 329    intelInitTextureCopyImageFuncs(functions);
 330    intelInitCopyImageFuncs(functions);
 331    intelInitClearFuncs(functions);
 332    intelInitBufferFuncs(functions);
 333    intelInitPixelFuncs(functions);
 334    intelInitBufferObjectFuncs(functions);
 335    brw_init_syncobj_functions(functions);
 336    brw_init_object_purgeable_functions(functions);
 337
 338    brwInitFragProgFuncs( functions );
 339    brw_init_common_queryobj_functions(functions);
 340    if (devinfo->gen >= 8 || devinfo->is_haswell)
 341       hsw_init_queryobj_functions(functions);
 342    else if (devinfo->gen >= 6)
 343       gen6_init_queryobj_functions(functions);
 344    else
 345       gen4_init_queryobj_functions(functions);
 346    brw_init_compute_functions(functions);
 347    brw_init_conditional_render_functions(functions);
 348
 349    functions->GenerateMipmap = brw_generate_mipmap;
 350
 351    functions->QueryInternalFormat = brw_query_internal_format;
 352
 353    functions->NewTransformFeedback = brw_new_transform_feedback;
 354    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 355    if (can_do_mi_math_and_lrr(brw->screen)) {
 356       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
 357       functions->EndTransformFeedback = hsw_end_transform_feedback;
 358       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
 359       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
 360    } else if (devinfo->gen >= 7) {
 361       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 362       functions->EndTransformFeedback = gen7_end_transform_feedback;
 363       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 364       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 365       functions->GetTransformFeedbackVertexCount =
 366          brw_get_transform_feedback_vertex_count;
 367    } else {
 368       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 369       functions->EndTransformFeedback = brw_end_transform_feedback;
 370       functions->PauseTransformFeedback = brw_pause_transform_feedback;
 371       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
 372       functions->GetTransformFeedbackVertexCount =
 373          brw_get_transform_feedback_vertex_count;
 374    }
 375
 376    if (devinfo->gen >= 6)
 377       functions->GetSamplePosition = gen6_get_sample_position;
 378
 379    /* GL_ARB_get_program_binary */
 380    brw_program_binary_init(brw->screen->deviceID);
 381    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
 382    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
 383    functions->ProgramBinaryDeserializeDriverBlob =
 384       brw_deserialize_program_binary;
 385
 386    if (brw->screen->disk_cache) {
 387       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
 388    }
 389
 390    functions->SetBackgroundContext = brw_set_background_context;
 391 }
 392
 393 static void
 394 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
 395 {
 396    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 397    struct gl_context *ctx = &brw->ctx;
 398
 399    /* The following SPIR-V capabilities are only supported on gen7+. In theory
 400     * you should enable the extension only on gen7+, but just in case let's
 401     * assert it.
 402     */
 403    assert(devinfo->gen >= 7);
 404
 405    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
 406    ctx->Const.SpirVCapabilities.draw_parameters = true;
 407    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
 408    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
 409    ctx->Const.SpirVCapabilities.image_write_without_format = true;
 410    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
 411    ctx->Const.SpirVCapabilities.tessellation = true;
 412    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
 413    ctx->Const.SpirVCapabilities.variable_pointers = true;
 414    ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->gen >= 8;
 415 }
 416
 417 static void
 418 brw_initialize_context_constants(struct brw_context *brw)
 419 {
 420    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 421    struct gl_context *ctx = &brw->ctx;
 422    const struct brw_compiler *compiler = brw->screen->compiler;
 423
 424    const bool stage_exists[MESA_SHADER_STAGES] = {
 425       [MESA_SHADER_VERTEX] = true,
 426       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
 427       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
 428       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
 429       [MESA_SHADER_FRAGMENT] = true,
 430       [MESA_SHADER_COMPUTE] =
 431          (_mesa_is_desktop_gl(ctx) &&
 432           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 433          (ctx->API == API_OPENGLES2 &&
 434           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
 435    };
 436
 437    unsigned num_stages = 0;
 438    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 439       if (stage_exists[i])
 440          num_stages++;
 441    }
 442
 443    unsigned max_samplers =
 444       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 445
 446    ctx->Const.MaxDualSourceDrawBuffers = 1;
 447    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 448    ctx->Const.MaxCombinedShaderOutputResources =
 449       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 450
 451    /* The timestamp register we can read for glGetTimestamp() is
 452     * sometimes only 32 bits, before scaling to nanoseconds (depending
 453     * on kernel).
 454     *
 455     * Once scaled to nanoseconds the timestamp would roll over at a
 456     * non-power-of-two, so an application couldn't use
 457     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
 458     * report 36 bits and truncate at that (rolling over 5 times as
 459     * often as the HW counter), and when the 32-bit counter rolls
 460     * over, it happens to also be at a rollover in the reported value
 461     * from near (1<<36) to 0.
 462     *
 463     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
 464     * rolls over every ~69 seconds.
 465     */
 466    ctx->Const.QueryCounterBits.Timestamp = 36;
 467
 468    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 469    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 470    if (devinfo->gen >= 7) {
 471       ctx->Const.MaxRenderbufferSize = 16384;
 472       ctx->Const.MaxTextureSize = 16384;
 473       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
 474    } else {
 475       ctx->Const.MaxRenderbufferSize = 8192;
 476       ctx->Const.MaxTextureSize = 8192;
 477       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 478    }
 479    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 480    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
 481    ctx->Const.MaxTextureMbytes = 1536;
 482    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
 483    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 484    ctx->Const.MaxTextureLodBias = 15.0;
 485    ctx->Const.StripTextureBorder = true;
 486    if (devinfo->gen >= 7) {
 487       ctx->Const.MaxProgramTextureGatherComponents = 4;
 488       ctx->Const.MinProgramTextureGatherOffset = -32;
 489       ctx->Const.MaxProgramTextureGatherOffset = 31;
 490    } else if (devinfo->gen == 6) {
 491       ctx->Const.MaxProgramTextureGatherComponents = 1;
 492       ctx->Const.MinProgramTextureGatherOffset = -8;
 493       ctx->Const.MaxProgramTextureGatherOffset = 7;
 494    }
 495
 496    ctx->Const.MaxUniformBlockSize = 65536;
 497
 498    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 499       struct gl_program_constants *prog = &ctx->Const.Program[i];
 500
 501       if (!stage_exists[i])
 502          continue;
 503
 504       prog->MaxTextureImageUnits = max_samplers;
 505
 506       prog->MaxUniformBlocks = BRW_MAX_UBO;
 507       prog->MaxCombinedUniformComponents =
 508          prog->MaxUniformComponents +
 509          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 510
 511       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 512       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 513       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 514       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 515    }
 516
 517    ctx->Const.MaxTextureUnits =
 518       MIN2(ctx->Const.MaxTextureCoordUnits,
 519            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 520
 521    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 522    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 523    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 524    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 525    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 526    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 527    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 528
 529
 530    /* Hardware only supports a limited number of transform feedback buffers.
 531     * So we need to override the Mesa default (which is based only on software
 532     * limits).
 533     */
 534    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 535
 536    /* On Gen6, in the worst case, we use up one binding table entry per
 537     * transform feedback component (see comments above the definition of
 538     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 539     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 540     * BRW_MAX_SOL_BINDINGS.
 541     *
 542     * In "separate components" mode, we need to divide this value by
 543     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 544     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 545     */
 546    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 547    ctx->Const.MaxTransformFeedbackSeparateComponents =
 548       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 549
 550    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
 551       !can_do_mi_math_and_lrr(brw->screen);
 552
 553    int max_samples;
 554    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
 555    const int clamp_max_samples =
 556       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 557
 558    if (clamp_max_samples < 0) {
 559       max_samples = msaa_modes[0];
 560    } else {
 561       /* Select the largest supported MSAA mode that does not exceed
 562        * clamp_max_samples.
 563        */
 564       max_samples = 0;
 565       for (int i = 0; msaa_modes[i] != 0; ++i) {
 566          if (msaa_modes[i] <= clamp_max_samples) {
 567             max_samples = msaa_modes[i];
 568             break;
 569          }
 570       }
 571    }
 572
 573    ctx->Const.MaxSamples = max_samples;
 574    ctx->Const.MaxColorTextureSamples = max_samples;
 575    ctx->Const.MaxDepthTextureSamples = max_samples;
 576    ctx->Const.MaxIntegerSamples = max_samples;
 577    ctx->Const.MaxImageSamples = 0;
 578
 579    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 580     * to map indices of rectangular grid to sample numbers within a pixel.
 581     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 582     * extension implementation. For more details see the comment above
 583     * gen6_set_sample_maps() definition.
 584     */
 585    gen6_set_sample_maps(ctx);
 586
 587    ctx->Const.MinLineWidth = 1.0;
 588    ctx->Const.MinLineWidthAA = 1.0;
 589    if (devinfo->gen >= 6) {
 590       ctx->Const.MaxLineWidth = 7.375;
 591       ctx->Const.MaxLineWidthAA = 7.375;
 592       ctx->Const.LineWidthGranularity = 0.125;
 593    } else {
 594       ctx->Const.MaxLineWidth = 7.0;
 595       ctx->Const.MaxLineWidthAA = 7.0;
 596       ctx->Const.LineWidthGranularity = 0.5;
 597    }
 598
 599    /* For non-antialiased lines, we have to round the line width to the
 600     * nearest whole number. Make sure that we don't advertise a line
 601     * width that, when rounded, will be beyond the actual hardware
 602     * maximum.
 603     */
 604    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 605
 606    ctx->Const.MinPointSize = 1.0;
 607    ctx->Const.MinPointSizeAA = 1.0;
 608    ctx->Const.MaxPointSize = 255.0;
 609    ctx->Const.MaxPointSizeAA = 255.0;
 610    ctx->Const.PointSizeGranularity = 1.0;
 611
 612    if (devinfo->gen >= 5 || devinfo->is_g4x)
 613       ctx->Const.MaxClipPlanes = 8;
 614
 615    ctx->Const.GLSLFragCoordIsSysVal = true;
 616    ctx->Const.GLSLFrontFacingIsSysVal = true;
 617    ctx->Const.GLSLTessLevelsAsInputs = true;
 618    ctx->Const.PrimitiveRestartForPatches = true;
 619
 620    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 621    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 622    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 623    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 624    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 625    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 626    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 627    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 628    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 629    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 630    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 631    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 632       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 633            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 634
 635    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 636    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 637    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 638    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 639    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 640    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 641    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 642    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 643    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 644       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 645            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 646
 647    /* Fragment shaders use real, 32-bit twos-complement integers for all
 648     * integer types.
 649     */
 650    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 651    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 652    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 653    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 654    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 655
 656    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 657    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 658    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 659    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 660    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 661
 662    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 663     * but we're not sure how it's actually done for vertex order,
 664     * that affect provoking vertex decision. Always use last vertex
 665     * convention for quad primitive which works as expected for now.
 666     */
 667    if (devinfo->gen >= 6)
 668       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 669
 670    ctx->Const.NativeIntegers = true;
 671
 672    /* Regarding the CMP instruction, the Ivybridge PRM says:
 673     *
 674     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 675     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 676     *    0xFFFFFFFF) is assigned to dst."
 677     *
 678     * but PRMs for earlier generations say
 679     *
 680     *   "In dword format, one GRF may store up to 8 results. When the register
 681     *    is used later as a vector of Booleans, as only LSB at each channel
 682     *    contains meaning [sic] data, software should make sure all higher bits
 683     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 684     *
 685     * We select the representation of a true boolean uniform to be ~0, and fix
 686     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 687     */
 688    ctx->Const.UniformBooleanTrue = ~0;
 689
 690    /* From the gen4 PRM, volume 4 page 127:
 691     *
 692     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 693     *      the base address of the first element of the surface, computed in
 694     *      software by adding the surface base address to the byte offset of
 695     *      the element in the buffer."
 696     *
 697     * However, unaligned accesses are slower, so enforce buffer alignment.
 698     *
 699     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
 700     * restriction: the start of the buffer needs to be 32B aligned.
 701     */
 702    ctx->Const.UniformBufferOffsetAlignment = 32;
 703
 704    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 705     * that we can safely have the CPU and GPU writing the same SSBO on
 706     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 707     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 708     * be updating disjoint regions of the buffer simultaneously and that will
 709     * break if the regions overlap the same cacheline.
 710     */
 711    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 712    ctx->Const.TextureBufferOffsetAlignment = 16;
 713    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 714
 715    if (devinfo->gen >= 6) {
 716       ctx->Const.MaxVarying = 32;
 717       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 718       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
 719          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
 720       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 721       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 722       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 723       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 724       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 725       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 726    }
 727
 728    /* We want the GLSL compiler to emit code that uses condition codes */
 729    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 730       ctx->Const.ShaderCompilerOptions[i] =
 731          brw->screen->compiler->glsl_compiler_options[i];
 732    }
 733
 734    if (devinfo->gen >= 7) {
 735       ctx->Const.MaxViewportWidth = 32768;
 736       ctx->Const.MaxViewportHeight = 32768;
 737    }
 738
 739    /* ARB_viewport_array, OES_viewport_array */
 740    if (devinfo->gen >= 6) {
 741       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 742       ctx->Const.ViewportSubpixelBits = 8;
 743
 744       /* Cast to float before negating because MaxViewportWidth is unsigned.
 745        */
 746       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 747       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 748    }
 749
 750    /* ARB_gpu_shader5 */
 751    if (devinfo->gen >= 7)
 752       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 753
 754    /* ARB_framebuffer_no_attachments */
 755    ctx->Const.MaxFramebufferWidth = 16384;
 756    ctx->Const.MaxFramebufferHeight = 16384;
 757    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 758    ctx->Const.MaxFramebufferSamples = max_samples;
 759
 760    /* OES_primitive_bounding_box */
 761    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
 762
 763    /* TODO: We should be able to use STD430 packing by default on all hardware
 764     * but some piglit tests [1] currently fail on SNB when this is enabled.
 765     * The problem is the messages we're using for doing uniform pulls
 766     * in the vec4 back-end on SNB is the OWORD block load instruction, which
 767     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
 768     * sampler which doesn't have these restrictions.
 769     *
 770     * In the scalar back-end, we use the sampler for dynamic uniform loads and
 771     * pull an entire cache line at a time for constant offset loads both of
 772     * which support almost any alignment.
 773     *
 774     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
 775     */
 776    if (devinfo->gen >= 7)
 777       ctx->Const.UseSTD430AsDefaultPacking = true;
 778
 779    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
 780       ctx->Const.AllowMappedBuffersDuringExecution = true;
 781
 782    /* GL_ARB_get_program_binary */
 783    ctx->Const.NumProgramBinaryFormats = 1;
 784 }
 785
 786 static void
 787 brw_initialize_cs_context_constants(struct brw_context *brw)
 788 {
 789    struct gl_context *ctx = &brw->ctx;
 790    const struct intel_screen *screen = brw->screen;
 791    struct gen_device_info *devinfo = &brw->screen->devinfo;
 792
 793    /* FINISHME: Do this for all platforms that the kernel supports */
 794    if (devinfo->is_cherryview &&
 795        screen->subslice_total > 0 && screen->eu_total > 0) {
 796       /* Logical CS threads = EUs per subslice * 7 threads per EU */
 797       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
 798
 799       /* Fuse configurations may give more threads than expected, never less. */
 800       if (max_cs_threads > devinfo->max_cs_threads)
 801          devinfo->max_cs_threads = max_cs_threads;
 802    }
 803
 804    /* Maximum number of scalar compute shader invocations that can be run in
 805     * parallel in the same subslice assuming SIMD32 dispatch.
 806     *
 807     * We don't advertise more than 64 threads, because we are limited to 64 by
 808     * our usage of thread_width_max in the gpgpu walker command. This only
 809     * currently impacts Haswell, which otherwise might be able to advertise 70
 810     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
 811     * required the number of invocation needed for ARB_compute_shader.
 812     */
 813    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
 814    const uint32_t max_invocations = 32 * max_threads;
 815    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 816    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 817    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 818    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 819    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 820 }
 821
 822 /**
 823  * Process driconf (drirc) options, setting appropriate context flags.
 824  *
 825  * intelInitExtensions still pokes at optionCache directly, in order to
 826  * avoid advertising various extensions.  No flags are set, so it makes
 827  * sense to continue doing that there.
 828  */
 829 static void
 830 brw_process_driconf_options(struct brw_context *brw)
 831 {
 832    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 833    struct gl_context *ctx = &brw->ctx;
 834
 835    driOptionCache *options = &brw->optionCache;
 836    driParseConfigFiles(options, &brw->screen->optionCache,
 837                        brw->driContext->driScreenPriv->myNum,
 838                        "i965", NULL, NULL, 0);
 839
 840    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
 841        brw->has_hiz = false;
 842        /* On gen6, you can only do separate stencil with HIZ. */
 843        if (devinfo->gen == 6)
 844           brw->has_separate_stencil = false;
 845    }
 846
 847    if (driQueryOptionb(options, "mesa_no_error"))
 848       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
 849
 850    if (driQueryOptionb(options, "always_flush_batch")) {
 851       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 852       brw->always_flush_batch = true;
 853    }
 854
 855    if (driQueryOptionb(options, "always_flush_cache")) {
 856       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 857       brw->always_flush_cache = true;
 858    }
 859
 860    if (driQueryOptionb(options, "disable_throttling")) {
 861       fprintf(stderr, "disabling flush throttling\n");
 862       brw->disable_throttling = true;
 863    }
 864
 865    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 866
 867    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
 868       brw->screen->compiler->precise_trig = true;
 869
 870    ctx->Const.ForceGLSLExtensionsWarn =
 871       driQueryOptionb(options, "force_glsl_extensions_warn");
 872
 873    ctx->Const.ForceGLSLVersion =
 874       driQueryOptioni(options, "force_glsl_version");
 875
 876    ctx->Const.DisableGLSLLineContinuations =
 877       driQueryOptionb(options, "disable_glsl_line_continuations");
 878
 879    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 880       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 881
 882    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
 883       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
 884
 885    ctx->Const.AllowHigherCompatVersion =
 886       driQueryOptionb(options, "allow_higher_compat_version");
 887
 888    ctx->Const.ForceGLSLAbsSqrt =
 889       driQueryOptionb(options, "force_glsl_abs_sqrt");
 890
 891    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
 892
 893    brw->dual_color_blend_by_location =
 894       driQueryOptionb(options, "dual_color_blend_by_location");
 895
 896    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
 897       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
 898
 899    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
 900    driComputeOptionsSha1(&brw->screen->optionCache,
 901                          ctx->Const.dri_config_options_sha1);
 902 }
 903
 904 GLboolean
 905 brwCreateContext(gl_api api,
 906                  const struct gl_config *mesaVis,
 907                  __DRIcontext *driContextPriv,
 908                  const struct __DriverContextConfig *ctx_config,
 909                  unsigned *dri_ctx_error,
 910                  void *sharedContextPrivate)
 911 {
 912    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 913    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
 914    const struct gen_device_info *devinfo = &screen->devinfo;
 915    struct dd_function_table functions;
 916
 917    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 918     * provides us with context reset notifications.
 919     */
 920    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
 921                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
 922                             __DRI_CTX_FLAG_NO_ERROR;
 923
 924    if (screen->has_context_reset_notification)
 925       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 926
 927    if (ctx_config->flags & ~allowed_flags) {
 928       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 929       return false;
 930    }
 931
 932    if (ctx_config->attribute_mask &
 933        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
 934          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
 935       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
 936       return false;
 937    }
 938
 939    bool notify_reset =
 940       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
 941        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
 942
 943    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 944    if (!brw) {
 945       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 946       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 947       return false;
 948    }
 949    brw->perf_ctx = gen_perf_new_context(brw);
 950
 951    driContextPriv->driverPrivate = brw;
 952    brw->driContext = driContextPriv;
 953    brw->screen = screen;
 954    brw->bufmgr = screen->bufmgr;
 955
 956    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 957    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 958
 959    brw->has_swizzling = screen->hw_has_swizzling;
 960
 961    brw->isl_dev = screen->isl_dev;
 962
 963    brw->vs.base.stage = MESA_SHADER_VERTEX;
 964    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 965    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 966    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 967    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 968    brw->cs.base.stage = MESA_SHADER_COMPUTE;
 969
 970    brw_init_driver_functions(brw, &functions);
 971
 972    if (notify_reset)
 973       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 974
 975    brw_process_driconf_options(brw);
 976
 977    if (api == API_OPENGL_CORE &&
 978        driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
 979       api = API_OPENGL_COMPAT;
 980    }
 981
 982    struct gl_context *ctx = &brw->ctx;
 983
 984    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 985       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 986       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 987       intelDestroyContext(driContextPriv);
 988       return false;
 989    }
 990
 991    driContextSetFlags(ctx, ctx_config->flags);
 992
 993    /* Initialize the software rasterizer and helper modules.
 994     *
 995     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 996     * software fallbacks (which we have to support on legacy GL to do weird
 997     * glDrawPixels(), glBitmap(), and other functions).
 998     */
 999    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1000       _swrast_CreateContext(ctx);
1001    }
1002
1003    _vbo_CreateContext(ctx);
1004    if (ctx->swrast_context) {
1005       _tnl_CreateContext(ctx);
1006       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1007       _swsetup_CreateContext(ctx);
1008
1009       /* Configure swrast to match hardware characteristics: */
1010       _swrast_allow_pixel_fog(ctx, false);
1011       _swrast_allow_vertex_fog(ctx, true);
1012    }
1013
1014    _mesa_meta_init(ctx);
1015
1016    if (INTEL_DEBUG & DEBUG_PERF)
1017       brw->perf_debug = true;
1018
1019    brw_initialize_cs_context_constants(brw);
1020    brw_initialize_context_constants(brw);
1021
1022    ctx->Const.ResetStrategy = notify_reset
1023       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1024
1025    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1026    _mesa_init_point(ctx);
1027
1028    intel_fbo_init(brw);
1029
1030    intel_batchbuffer_init(brw);
1031
1032    /* Create a new hardware context.  Using a hardware context means that
1033     * our GPU state will be saved/restored on context switch, allowing us
1034     * to assume that the GPU is in the same state we left it in.
1035     *
1036     * This is required for transform feedback buffer offsets, query objects,
1037     * and also allows us to reduce how much state we have to emit.
1038     */
1039    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1040    if (!brw->hw_ctx && devinfo->gen >= 6) {
1041       fprintf(stderr, "Failed to create hardware context.\n");
1042       intelDestroyContext(driContextPriv);
1043       return false;
1044    }
1045
1046    if (brw->hw_ctx) {
1047       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1048       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1049          switch (ctx_config->priority) {
1050          case __DRI_CTX_PRIORITY_LOW:
1051             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1052             break;
1053          case __DRI_CTX_PRIORITY_HIGH:
1054             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1055             break;
1056          }
1057       }
1058       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1059           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1060          fprintf(stderr,
1061                  "Failed to set priority [%d:%d] for hardware context.\n",
1062                  ctx_config->priority, hw_priority);
1063          intelDestroyContext(driContextPriv);
1064          return false;
1065       }
1066    }
1067
1068    if (brw_init_pipe_control(brw, devinfo)) {
1069       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1070       intelDestroyContext(driContextPriv);
1071       return false;
1072    }
1073
1074    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1075
1076    brw_init_state(brw);
1077
1078    intelInitExtensions(ctx);
1079
1080    brw_init_surface_formats(brw);
1081
1082    brw_blorp_init(brw);
1083
1084    brw->urb.size = devinfo->urb.size;
1085
1086    if (devinfo->gen == 6)
1087       brw->urb.gs_present = false;
1088
1089    brw->prim_restart.in_progress = false;
1090    brw->prim_restart.enable_cut_index = false;
1091    brw->gs.enabled = false;
1092    brw->clip.viewport_count = 1;
1093
1094    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1095
1096    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1097
1098    ctx->VertexProgram._MaintainTnlProgram = true;
1099    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1100
1101    brw_draw_init( brw );
1102
1103    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1104       /* Turn on some extra GL_ARB_debug_output generation. */
1105       brw->perf_debug = true;
1106    }
1107
1108    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1109       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1110       ctx->Const.RobustAccess = GL_TRUE;
1111    }
1112
1113    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1114       brw_init_shader_time(brw);
1115
1116    _mesa_override_extensions(ctx);
1117    _mesa_compute_version(ctx);
1118
1119    /* GL_ARB_gl_spirv */
1120    if (ctx->Extensions.ARB_gl_spirv) {
1121       brw_initialize_spirv_supported_capabilities(brw);
1122
1123       if (ctx->Extensions.ARB_spirv_extensions) {
1124          /* GL_ARB_spirv_extensions */
1125          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1126          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1127                                                &ctx->Const.SpirVCapabilities);
1128       }
1129    }
1130
1131    _mesa_initialize_dispatch_tables(ctx);
1132    _mesa_initialize_vbo_vtxfmt(ctx);
1133
1134    if (ctx->Extensions.INTEL_performance_query)
1135       brw_init_performance_queries(brw);
1136
1137    vbo_use_buffer_objects(ctx);
1138
1139    brw->ctx.Cache = brw->screen->disk_cache;
1140
1141    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1142        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1143       /* Loader supports multithreading, and so do we. */
1144       _mesa_glthread_init(ctx);
1145    }
1146
1147    return true;
1148 }
1149
1150 void
1151 intelDestroyContext(__DRIcontext * driContextPriv)
1152 {
1153    struct brw_context *brw =
1154       (struct brw_context *) driContextPriv->driverPrivate;
1155    struct gl_context *ctx = &brw->ctx;
1156
1157    GET_CURRENT_CONTEXT(curctx);
1158
1159    if (curctx == NULL) {
1160       /* No current context, but we need one to release
1161        * renderbuffer surface when we release framebuffer.
1162        * So temporarily bind the context.
1163        */
1164       _mesa_make_current(ctx, NULL, NULL);
1165    }
1166
1167    _mesa_glthread_destroy(&brw->ctx);
1168
1169    _mesa_meta_free(&brw->ctx);
1170
1171    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1172       /* Force a report. */
1173       brw->shader_time.report_time = 0;
1174
1175       brw_collect_and_report_shader_time(brw);
1176       brw_destroy_shader_time(brw);
1177    }
1178
1179    blorp_finish(&brw->blorp);
1180
1181    brw_destroy_state(brw);
1182    brw_draw_destroy(brw);
1183
1184    brw_bo_unreference(brw->curbe.curbe_bo);
1185
1186    brw_bo_unreference(brw->vs.base.scratch_bo);
1187    brw_bo_unreference(brw->tcs.base.scratch_bo);
1188    brw_bo_unreference(brw->tes.base.scratch_bo);
1189    brw_bo_unreference(brw->gs.base.scratch_bo);
1190    brw_bo_unreference(brw->wm.base.scratch_bo);
1191
1192    brw_bo_unreference(brw->vs.base.push_const_bo);
1193    brw_bo_unreference(brw->tcs.base.push_const_bo);
1194    brw_bo_unreference(brw->tes.base.push_const_bo);
1195    brw_bo_unreference(brw->gs.base.push_const_bo);
1196    brw_bo_unreference(brw->wm.base.push_const_bo);
1197
1198    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1199
1200    if (ctx->swrast_context) {
1201       _swsetup_DestroyContext(&brw->ctx);
1202       _tnl_DestroyContext(&brw->ctx);
1203    }
1204    _vbo_DestroyContext(&brw->ctx);
1205
1206    if (ctx->swrast_context)
1207       _swrast_DestroyContext(&brw->ctx);
1208
1209    brw_fini_pipe_control(brw);
1210    intel_batchbuffer_free(&brw->batch);
1211
1212    brw_bo_unreference(brw->throttle_batch[1]);
1213    brw_bo_unreference(brw->throttle_batch[0]);
1214    brw->throttle_batch[1] = NULL;
1215    brw->throttle_batch[0] = NULL;
1216
1217    driDestroyOptionCache(&brw->optionCache);
1218
1219    /* free the Mesa context */
1220    _mesa_free_context_data(&brw->ctx);
1221
1222    ralloc_free(brw);
1223    driContextPriv->driverPrivate = NULL;
1224 }
1225
1226 GLboolean
1227 intelUnbindContext(__DRIcontext * driContextPriv)
1228 {
1229    struct gl_context *ctx = driContextPriv->driverPrivate;
1230    _mesa_glthread_finish(ctx);
1231
1232    /* Unset current context and dispath table */
1233    _mesa_make_current(NULL, NULL, NULL);
1234
1235    return true;
1236 }
1237
1238 /**
1239  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1240  * on window system framebuffers.
1241  *
1242  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1243  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1244  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1245  * for a visual where you're guaranteed to be capable, but it turns out that
1246  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1247  * incapable ones, because there's no difference between the two in resources
1248  * used.  Applications thus get built that accidentally rely on the default
1249  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1250  * great...
1251  *
1252  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1253  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1254  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1255  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1256  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1257  * and get no sRGB encode (assuming that both kinds of visual are available).
1258  * Thus our choice to support sRGB by default on our visuals for desktop would
1259  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1260  *
1261  * Unfortunately, renderbuffer setup happens before a context is created.  So
1262  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1263  * context (without an sRGB visual), we go turn that back off before anyone
1264  * finds out.
1265  */
1266 static void
1267 intel_gles3_srgb_workaround(struct brw_context *brw,
1268                             struct gl_framebuffer *fb)
1269 {
1270    struct gl_context *ctx = &brw->ctx;
1271
1272    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1273       return;
1274
1275    for (int i = 0; i < BUFFER_COUNT; i++) {
1276       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1277
1278       /* Check if sRGB was specifically asked for. */
1279       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1280       if (irb && irb->need_srgb)
1281          return;
1282
1283       if (rb)
1284          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1285    }
1286    /* Disable sRGB from framebuffers that are not compatible. */
1287    fb->Visual.sRGBCapable = false;
1288 }
1289
1290 GLboolean
1291 intelMakeCurrent(__DRIcontext * driContextPriv,
1292                  __DRIdrawable * driDrawPriv,
1293                  __DRIdrawable * driReadPriv)
1294 {
1295    struct brw_context *brw;
1296
1297    if (driContextPriv)
1298       brw = (struct brw_context *) driContextPriv->driverPrivate;
1299    else
1300       brw = NULL;
1301
1302    if (driContextPriv) {
1303       struct gl_context *ctx = &brw->ctx;
1304       struct gl_framebuffer *fb, *readFb;
1305
1306       if (driDrawPriv == NULL) {
1307          fb = _mesa_get_incomplete_framebuffer();
1308       } else {
1309          fb = driDrawPriv->driverPrivate;
1310          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1311       }
1312
1313       if (driReadPriv == NULL) {
1314          readFb = _mesa_get_incomplete_framebuffer();
1315       } else {
1316          readFb = driReadPriv->driverPrivate;
1317          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1318       }
1319
1320       /* The sRGB workaround changes the renderbuffer's format. We must change
1321        * the format before the renderbuffer's miptree get's allocated, otherwise
1322        * the formats of the renderbuffer and its miptree will differ.
1323        */
1324       intel_gles3_srgb_workaround(brw, fb);
1325       intel_gles3_srgb_workaround(brw, readFb);
1326
1327       /* If the context viewport hasn't been initialized, force a call out to
1328        * the loader to get buffers so we have a drawable size for the initial
1329        * viewport. */
1330       if (!brw->ctx.ViewportInitialized)
1331          intel_prepare_render(brw);
1332
1333       _mesa_make_current(ctx, fb, readFb);
1334    } else {
1335       GET_CURRENT_CONTEXT(ctx);
1336       _mesa_glthread_finish(ctx);
1337       _mesa_make_current(NULL, NULL, NULL);
1338    }
1339
1340    return true;
1341 }
1342
1343 void
1344 intel_resolve_for_dri2_flush(struct brw_context *brw,
1345                              __DRIdrawable *drawable)
1346 {
1347    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1348
1349    if (devinfo->gen < 6) {
1350       /* MSAA and fast color clear are not supported, so don't waste time
1351        * checking whether a resolve is needed.
1352        */
1353       return;
1354    }
1355
1356    struct gl_framebuffer *fb = drawable->driverPrivate;
1357    struct intel_renderbuffer *rb;
1358
1359    /* Usually, only the back buffer will need to be downsampled. However,
1360     * the front buffer will also need it if the user has rendered into it.
1361     */
1362    static const gl_buffer_index buffers[2] = {
1363          BUFFER_BACK_LEFT,
1364          BUFFER_FRONT_LEFT,
1365    };
1366
1367    for (int i = 0; i < 2; ++i) {
1368       rb = intel_get_renderbuffer(fb, buffers[i]);
1369       if (rb == NULL || rb->mt == NULL)
1370          continue;
1371       if (rb->mt->surf.samples == 1) {
1372          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1373                 rb->layer_count == 1);
1374          intel_miptree_prepare_external(brw, rb->mt);
1375       } else {
1376          intel_renderbuffer_downsample(brw, rb);
1377
1378          /* Call prepare_external on the single-sample miptree to do any
1379           * needed resolves prior to handing it off to the window system.
1380           * This is needed in the case that rb->singlesample_mt is Y-tiled
1381           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1382           * this case, the MSAA resolve above will write compressed data into
1383           * rb->singlesample_mt.
1384           *
1385           * TODO: Some day, if we decide to care about the tiny performance
1386           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1387           * we could detect this case and just allocate the single-sampled
1388           * miptree without aux.  However, that would be a lot of plumbing and
1389           * this is a rather exotic case so it's not really worth it.
1390           */
1391          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1392       }
1393    }
1394 }
1395
1396 static unsigned
1397 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1398 {
1399    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1400 }
1401
1402 static void
1403 intel_query_dri2_buffers(struct brw_context *brw,
1404                          __DRIdrawable *drawable,
1405                          __DRIbuffer **buffers,
1406                          int *count);
1407
1408 static void
1409 intel_process_dri2_buffer(struct brw_context *brw,
1410                           __DRIdrawable *drawable,
1411                           __DRIbuffer *buffer,
1412                           struct intel_renderbuffer *rb,
1413                           const char *buffer_name);
1414
1415 static void
1416 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1417
1418 static void
1419 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1420 {
1421    struct gl_framebuffer *fb = drawable->driverPrivate;
1422    struct intel_renderbuffer *rb;
1423    __DRIbuffer *buffers = NULL;
1424    int count;
1425    const char *region_name;
1426
1427    /* Set this up front, so that in case our buffers get invalidated
1428     * while we're getting new buffers, we don't clobber the stamp and
1429     * thus ignore the invalidate. */
1430    drawable->lastStamp = drawable->dri2.stamp;
1431
1432    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1433       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1434
1435    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1436
1437    if (buffers == NULL)
1438       return;
1439
1440    for (int i = 0; i < count; i++) {
1441        switch (buffers[i].attachment) {
1442        case __DRI_BUFFER_FRONT_LEFT:
1443            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1444            region_name = "dri2 front buffer";
1445            break;
1446
1447        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1448            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1449            region_name = "dri2 fake front buffer";
1450            break;
1451
1452        case __DRI_BUFFER_BACK_LEFT:
1453            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1454            region_name = "dri2 back buffer";
1455            break;
1456
1457        case __DRI_BUFFER_DEPTH:
1458        case __DRI_BUFFER_HIZ:
1459        case __DRI_BUFFER_DEPTH_STENCIL:
1460        case __DRI_BUFFER_STENCIL:
1461        case __DRI_BUFFER_ACCUM:
1462        default:
1463            fprintf(stderr,
1464                    "unhandled buffer attach event, attachment type %d\n",
1465                    buffers[i].attachment);
1466            return;
1467        }
1468
1469        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1470    }
1471
1472 }
1473
1474 void
1475 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1476 {
1477    struct brw_context *brw = context->driverPrivate;
1478    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1479
1480    /* Set this up front, so that in case our buffers get invalidated
1481     * while we're getting new buffers, we don't clobber the stamp and
1482     * thus ignore the invalidate. */
1483    drawable->lastStamp = drawable->dri2.stamp;
1484
1485    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1486       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1487
1488    if (dri_screen->image.loader)
1489       intel_update_image_buffers(brw, drawable);
1490    else
1491       intel_update_dri2_buffers(brw, drawable);
1492
1493    driUpdateFramebufferSize(&brw->ctx, drawable);
1494 }
1495
1496 /**
1497  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1498  * state is required.
1499  */
1500 void
1501 intel_prepare_render(struct brw_context *brw)
1502 {
1503    struct gl_context *ctx = &brw->ctx;
1504    __DRIcontext *driContext = brw->driContext;
1505    __DRIdrawable *drawable;
1506
1507    drawable = driContext->driDrawablePriv;
1508    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1509       if (drawable->lastStamp != drawable->dri2.stamp)
1510          intel_update_renderbuffers(driContext, drawable);
1511       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1512    }
1513
1514    drawable = driContext->driReadablePriv;
1515    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1516       if (drawable->lastStamp != drawable->dri2.stamp)
1517          intel_update_renderbuffers(driContext, drawable);
1518       driContext->dri2.read_stamp = drawable->dri2.stamp;
1519    }
1520
1521    /* If we're currently rendering to the front buffer, the rendering
1522     * that will happen next will probably dirty the front buffer.  So
1523     * mark it as dirty here.
1524     */
1525    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
1526        ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
1527       brw->front_buffer_dirty = true;
1528    }
1529
1530    if (brw->is_shared_buffer_bound) {
1531       /* Subsequent rendering will probably dirty the shared buffer. */
1532       brw->is_shared_buffer_dirty = true;
1533    }
1534 }
1535
1536 /**
1537  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1538  *
1539  * To determine which DRI buffers to request, examine the renderbuffers
1540  * attached to the drawable's framebuffer. Then request the buffers with
1541  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1542  *
1543  * This is called from intel_update_renderbuffers().
1544  *
1545  * \param drawable      Drawable whose buffers are queried.
1546  * \param buffers       [out] List of buffers returned by DRI2 query.
1547  * \param buffer_count  [out] Number of buffers returned.
1548  *
1549  * \see intel_update_renderbuffers()
1550  * \see DRI2GetBuffers()
1551  * \see DRI2GetBuffersWithFormat()
1552  */
1553 static void
1554 intel_query_dri2_buffers(struct brw_context *brw,
1555                          __DRIdrawable *drawable,
1556                          __DRIbuffer **buffers,
1557                          int *buffer_count)
1558 {
1559    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1560    struct gl_framebuffer *fb = drawable->driverPrivate;
1561    int i = 0;
1562    unsigned attachments[8];
1563
1564    struct intel_renderbuffer *front_rb;
1565    struct intel_renderbuffer *back_rb;
1566
1567    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1568    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1569
1570    memset(attachments, 0, sizeof(attachments));
1571    if ((_mesa_is_front_buffer_drawing(fb) ||
1572         _mesa_is_front_buffer_reading(fb) ||
1573         !back_rb) && front_rb) {
1574       /* If a fake front buffer is in use, then querying for
1575        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1576        * the real front buffer to the fake front buffer.  So before doing the
1577        * query, we need to make sure all the pending drawing has landed in the
1578        * real front buffer.
1579        */
1580       intel_batchbuffer_flush(brw);
1581       intel_flush_front(&brw->ctx);
1582
1583       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1584       attachments[i++] = intel_bits_per_pixel(front_rb);
1585    } else if (front_rb && brw->front_buffer_dirty) {
1586       /* We have pending front buffer rendering, but we aren't querying for a
1587        * front buffer.  If the front buffer we have is a fake front buffer,
1588        * the X server is going to throw it away when it processes the query.
1589        * So before doing the query, make sure all the pending drawing has
1590        * landed in the real front buffer.
1591        */
1592       intel_batchbuffer_flush(brw);
1593       intel_flush_front(&brw->ctx);
1594    }
1595
1596    if (back_rb) {
1597       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1598       attachments[i++] = intel_bits_per_pixel(back_rb);
1599    }
1600
1601    assert(i <= ARRAY_SIZE(attachments));
1602
1603    *buffers =
1604       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1605                                                     &drawable->w,
1606                                                     &drawable->h,
1607                                                     attachments, i / 2,
1608                                                     buffer_count,
1609                                                     drawable->loaderPrivate);
1610 }
1611
1612 /**
1613  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1614  *
1615  * This is called from intel_update_renderbuffers().
1616  *
1617  * \par Note:
1618  *    DRI buffers whose attachment point is DRI2BufferStencil or
1619  *    DRI2BufferDepthStencil are handled as special cases.
1620  *
1621  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1622  *        that is passed to brw_bo_gem_create_from_name().
1623  *
1624  * \see intel_update_renderbuffers()
1625  */
1626 static void
1627 intel_process_dri2_buffer(struct brw_context *brw,
1628                           __DRIdrawable *drawable,
1629                           __DRIbuffer *buffer,
1630                           struct intel_renderbuffer *rb,
1631                           const char *buffer_name)
1632 {
1633    struct gl_framebuffer *fb = drawable->driverPrivate;
1634    struct brw_bo *bo;
1635
1636    if (!rb)
1637       return;
1638
1639    unsigned num_samples = rb->Base.Base.NumSamples;
1640
1641    /* We try to avoid closing and reopening the same BO name, because the first
1642     * use of a mapping of the buffer involves a bunch of page faulting which is
1643     * moderately expensive.
1644     */
1645    struct intel_mipmap_tree *last_mt;
1646    if (num_samples == 0)
1647       last_mt = rb->mt;
1648    else
1649       last_mt = rb->singlesample_mt;
1650
1651    uint32_t old_name = 0;
1652    if (last_mt) {
1653        /* The bo already has a name because the miptree was created by a
1654         * previous call to intel_process_dri2_buffer(). If a bo already has a
1655         * name, then brw_bo_flink() is a low-cost getter.  It does not
1656         * create a new name.
1657         */
1658       brw_bo_flink(last_mt->bo, &old_name);
1659    }
1660
1661    if (old_name == buffer->name)
1662       return;
1663
1664    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1665       fprintf(stderr,
1666               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1667               buffer->name, buffer->attachment,
1668               buffer->cpp, buffer->pitch);
1669    }
1670
1671    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1672                                           buffer->name);
1673    if (!bo) {
1674       fprintf(stderr,
1675               "Failed to open BO for returned DRI2 buffer "
1676               "(%dx%d, %s, named %d).\n"
1677               "This is likely a bug in the X Server that will lead to a "
1678               "crash soon.\n",
1679               drawable->w, drawable->h, buffer_name, buffer->name);
1680       return;
1681    }
1682
1683    uint32_t tiling, swizzle;
1684    brw_bo_get_tiling(bo, &tiling, &swizzle);
1685
1686    struct intel_mipmap_tree *mt =
1687       intel_miptree_create_for_bo(brw,
1688                                   bo,
1689                                   intel_rb_format(rb),
1690                                   0,
1691                                   drawable->w,
1692                                   drawable->h,
1693                                   1,
1694                                   buffer->pitch,
1695                                   isl_tiling_from_i915_tiling(tiling),
1696                                   MIPTREE_CREATE_DEFAULT);
1697    if (!mt) {
1698       brw_bo_unreference(bo);
1699       return;
1700    }
1701
1702    /* We got this BO from X11.  We cana't assume that we have coherent texture
1703     * access because X may suddenly decide to use it for scan-out which would
1704     * destroy coherency.
1705     */
1706    bo->cache_coherent = false;
1707
1708    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1709                                                  drawable->w, drawable->h,
1710                                                  buffer->pitch)) {
1711       brw_bo_unreference(bo);
1712       intel_miptree_release(&mt);
1713       return;
1714    }
1715
1716    if (_mesa_is_front_buffer_drawing(fb) &&
1717        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1718         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1719        rb->Base.Base.NumSamples > 1) {
1720       intel_renderbuffer_upsample(brw, rb);
1721    }
1722
1723    assert(rb->mt);
1724
1725    brw_bo_unreference(bo);
1726 }
1727
1728 /**
1729  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1730  *
1731  * To determine which DRI buffers to request, examine the renderbuffers
1732  * attached to the drawable's framebuffer. Then request the buffers from
1733  * the image loader
1734  *
1735  * This is called from intel_update_renderbuffers().
1736  *
1737  * \param drawable      Drawable whose buffers are queried.
1738  * \param buffers       [out] List of buffers returned by DRI2 query.
1739  * \param buffer_count  [out] Number of buffers returned.
1740  *
1741  * \see intel_update_renderbuffers()
1742  */
1743
1744 static void
1745 intel_update_image_buffer(struct brw_context *intel,
1746                           __DRIdrawable *drawable,
1747                           struct intel_renderbuffer *rb,
1748                           __DRIimage *buffer,
1749                           enum __DRIimageBufferMask buffer_type)
1750 {
1751    struct gl_framebuffer *fb = drawable->driverPrivate;
1752
1753    if (!rb || !buffer->bo)
1754       return;
1755
1756    unsigned num_samples = rb->Base.Base.NumSamples;
1757
1758    /* Check and see if we're already bound to the right
1759     * buffer object
1760     */
1761    struct intel_mipmap_tree *last_mt;
1762    if (num_samples == 0)
1763       last_mt = rb->mt;
1764    else
1765       last_mt = rb->singlesample_mt;
1766
1767    if (last_mt && last_mt->bo == buffer->bo) {
1768       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1769          intel_miptree_make_shareable(intel, last_mt);
1770       }
1771       return;
1772    }
1773
1774    /* Only allow internal compression if samples == 0.  For multisampled
1775     * window system buffers, the only thing the single-sampled buffer is used
1776     * for is as a resolve target.  If we do any compression beyond what is
1777     * supported by the window system, we will just have to resolve so it's
1778     * probably better to just not bother.
1779     */
1780    const bool allow_internal_aux = (num_samples == 0);
1781
1782    struct intel_mipmap_tree *mt =
1783       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1784                                          intel_rb_format(rb),
1785                                          allow_internal_aux);
1786    if (!mt)
1787       return;
1788
1789    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1790                                                  buffer->width, buffer->height,
1791                                                  buffer->pitch)) {
1792       intel_miptree_release(&mt);
1793       return;
1794    }
1795
1796    if (_mesa_is_front_buffer_drawing(fb) &&
1797        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1798        rb->Base.Base.NumSamples > 1) {
1799       intel_renderbuffer_upsample(intel, rb);
1800    }
1801
1802    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1803       /* The compositor and the application may access this image
1804        * concurrently. The display hardware may even scanout the image while
1805        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1806        * concurrent access, so permanently disable aux for this miptree.
1807        *
1808        * Perhaps we could improve overall application performance by
1809        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1810        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1811        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1812        * approach to be highly dependent on the application's GL usage.
1813        *
1814        * I [chadv] expect clever disabling/reenabling to be counterproductive
1815        * in the use cases I care about: applications that render nearly
1816        * realtime handwriting to the surface while possibly undergiong
1817        * simultaneously scanout as a display plane. The app requires low
1818        * render latency. Even though the app spends most of its time in
1819        * shared-buffer mode, it also frequently transitions between
1820        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1821        * mode.  Visual sutter during the transitions should be avoided.
1822        *
1823        * In this case, I [chadv] believe reducing the GPU workload at
1824        * shared-buffer/double-buffer transitions would offer a smoother app
1825        * experience than any savings due to aux compression. But I've
1826        * collected no data to prove my theory.
1827        */
1828       intel_miptree_make_shareable(intel, mt);
1829    }
1830 }
1831
1832 static void
1833 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1834 {
1835    struct gl_framebuffer *fb = drawable->driverPrivate;
1836    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1837    struct intel_renderbuffer *front_rb;
1838    struct intel_renderbuffer *back_rb;
1839    struct __DRIimageList images;
1840    mesa_format format;
1841    uint32_t buffer_mask = 0;
1842    int ret;
1843
1844    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1845    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1846
1847    if (back_rb)
1848       format = intel_rb_format(back_rb);
1849    else if (front_rb)
1850       format = intel_rb_format(front_rb);
1851    else
1852       return;
1853
1854    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1855                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1856       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1857    }
1858
1859    if (back_rb)
1860       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1861
1862    ret = dri_screen->image.loader->getBuffers(drawable,
1863                                               driGLFormatToImageFormat(format),
1864                                               &drawable->dri2.stamp,
1865                                               drawable->loaderPrivate,
1866                                               buffer_mask,
1867                                               &images);
1868    if (!ret)
1869       return;
1870
1871    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1872       drawable->w = images.front->width;
1873       drawable->h = images.front->height;
1874       intel_update_image_buffer(brw,
1875                                 drawable,
1876                                 front_rb,
1877                                 images.front,
1878                                 __DRI_IMAGE_BUFFER_FRONT);
1879    }
1880
1881    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1882       drawable->w = images.back->width;
1883       drawable->h = images.back->height;
1884       intel_update_image_buffer(brw,
1885                                 drawable,
1886                                 back_rb,
1887                                 images.back,
1888                                 __DRI_IMAGE_BUFFER_BACK);
1889    }
1890
1891    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1892       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1893       drawable->w = images.back->width;
1894       drawable->h = images.back->height;
1895       intel_update_image_buffer(brw,
1896                                 drawable,
1897                                 back_rb,
1898                                 images.back,
1899                                 __DRI_IMAGE_BUFFER_SHARED);
1900       brw->is_shared_buffer_bound = true;
1901    } else {
1902       brw->is_shared_buffer_bound = false;
1903       brw->is_shared_buffer_dirty = false;
1904    }
1905 }