src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44 #include "main/framebuffer.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/driverfuncs.h"
  49 #include "drivers/common/meta.h"
  50 #include "utils.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_defines.h"
  54 #include "brw_compiler.h"
  55 #include "brw_draw.h"
  56 #include "brw_state.h"
  57
  58 #include "intel_batchbuffer.h"
  59 #include "intel_buffer_objects.h"
  60 #include "intel_buffers.h"
  61 #include "intel_fbo.h"
  62 #include "intel_mipmap_tree.h"
  63 #include "intel_pixel.h"
  64 #include "intel_image.h"
  65 #include "intel_tex.h"
  66 #include "intel_tex_obj.h"
  67
  68 #include "swrast_setup/swrast_setup.h"
  69 #include "tnl/tnl.h"
  70 #include "tnl/t_pipeline.h"
  71 #include "util/ralloc.h"
  72 #include "util/debug.h"
  73
  74 /***************************************
  75  * Mesa's Driver Functions
  76  ***************************************/
  77
  78 static size_t
  79 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  80                              GLenum internalFormat, int samples[16])
  81 {
  82    struct brw_context *brw = brw_context(ctx);
  83
  84    (void) target;
  85
  86    switch (brw->gen) {
  87    case 9:
  88       samples[0] = 16;
  89       samples[1] = 8;
  90       samples[2] = 4;
  91       samples[3] = 2;
  92       return 4;
  93
  94    case 8:
  95       samples[0] = 8;
  96       samples[1] = 4;
  97       samples[2] = 2;
  98       return 3;
  99
 100    case 7:
 101       samples[0] = 8;
 102       samples[1] = 4;
 103       return 2;
 104
 105    case 6:
 106       samples[0] = 4;
 107       return 1;
 108
 109    default:
 110       assert(brw->gen < 6);
 111       samples[0] = 1;
 112       return 1;
 113    }
 114 }
 115
 116 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 117
 118 const char *
 119 brw_get_renderer_string(unsigned deviceID)
 120 {
 121    const char *chipset;
 122    static char buffer[128];
 123
 124    switch (deviceID) {
 125 #undef CHIPSET
 126 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 127 #include "pci_ids/i965_pci_ids.h"
 128    default:
 129       chipset = "Unknown Intel Chipset";
 130       break;
 131    }
 132
 133    (void) driGetRendererString(buffer, chipset, 0);
 134    return buffer;
 135 }
 136
 137 static const GLubyte *
 138 intel_get_string(struct gl_context * ctx, GLenum name)
 139 {
 140    const struct brw_context *const brw = brw_context(ctx);
 141
 142    switch (name) {
 143    case GL_VENDOR:
 144       return (GLubyte *) brw_vendor_string;
 145
 146    case GL_RENDERER:
 147       return
 148          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 149
 150    default:
 151       return NULL;
 152    }
 153 }
 154
 155 static void
 156 intel_viewport(struct gl_context *ctx)
 157 {
 158    struct brw_context *brw = brw_context(ctx);
 159    __DRIcontext *driContext = brw->driContext;
 160
 161    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 162       if (driContext->driDrawablePriv)
 163          dri2InvalidateDrawable(driContext->driDrawablePriv);
 164       if (driContext->driReadablePriv)
 165          dri2InvalidateDrawable(driContext->driReadablePriv);
 166    }
 167 }
 168
 169 static void
 170 intel_update_framebuffer(struct gl_context *ctx,
 171                          struct gl_framebuffer *fb)
 172 {
 173    struct brw_context *brw = brw_context(ctx);
 174
 175    /* Quantize the derived default number of samples
 176     */
 177    fb->DefaultGeometry._NumSamples =
 178       intel_quantize_num_samples(brw->intelScreen,
 179                                  fb->DefaultGeometry.NumSamples);
 180 }
 181
 182 static void
 183 intel_update_state(struct gl_context * ctx, GLuint new_state)
 184 {
 185    struct brw_context *brw = brw_context(ctx);
 186    struct intel_texture_object *tex_obj;
 187    struct intel_renderbuffer *depth_irb;
 188
 189    if (ctx->swrast_context)
 190       _swrast_InvalidateState(ctx, new_state);
 191    _vbo_InvalidateState(ctx, new_state);
 192
 193    brw->NewGLState |= new_state;
 194
 195    _mesa_unlock_context_textures(ctx);
 196
 197    /* Resolve the depth buffer's HiZ buffer. */
 198    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 199    if (depth_irb)
 200       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 201
 202    /* Resolve depth buffer and render cache of each enabled texture. */
 203    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 204    for (int i = 0; i <= maxEnabledUnit; i++) {
 205       if (!ctx->Texture.Unit[i]._Current)
 206          continue;
 207       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 208       if (!tex_obj || !tex_obj->mt)
 209          continue;
 210       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 211       /* Sampling engine understands lossless compression and resolving
 212        * those surfaces should be skipped for performance reasons.
 213        */
 214       intel_miptree_resolve_color(brw, tex_obj->mt,
 215                                   INTEL_MIPTREE_IGNORE_CCS_E);
 216       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 217    }
 218
 219    /* Resolve color for each active shader image. */
 220    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 221       const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
 222          ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
 223
 224       if (unlikely(shader && shader->NumImages)) {
 225          for (unsigned j = 0; j < shader->NumImages; j++) {
 226             struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
 227             tex_obj = intel_texture_object(u->TexObj);
 228
 229             if (tex_obj && tex_obj->mt) {
 230                /* Access to images is implemented using indirect messages
 231                 * against data port. Normal render target write understands
 232                 * lossless compression but unfortunately the typed/untyped
 233                 * read/write interface doesn't. Therefore the compressed
 234                 * surfaces need to be resolved prior to accessing them.
 235                 */
 236                intel_miptree_resolve_color(brw, tex_obj->mt, 0);
 237                brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 238             }
 239          }
 240       }
 241    }
 242
 243    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
 244     * single-sampled color renderbuffers because the CCS buffer isn't
 245     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
 246     * enabled because otherwise the surface state will be programmed with the
 247     * linear equivalent format anyway.
 248     */
 249    if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
 250       struct gl_framebuffer *fb = ctx->DrawBuffer;
 251       for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
 252          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
 253
 254          if (rb == NULL)
 255             continue;
 256
 257          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 258          struct intel_mipmap_tree *mt = irb->mt;
 259
 260          if (mt == NULL ||
 261              mt->num_samples > 1 ||
 262              _mesa_get_srgb_format_linear(mt->format) == mt->format)
 263                continue;
 264
 265          /* Lossless compression is not supported for SRGB formats, it
 266           * should be impossible to get here with such surfaces.
 267           */
 268          assert(!intel_miptree_is_lossless_compressed(brw, mt));
 269          intel_miptree_resolve_color(brw, mt, 0);
 270          brw_render_cache_set_check_flush(brw, mt->bo);
 271       }
 272    }
 273
 274    _mesa_lock_context_textures(ctx);
 275
 276    if (new_state & _NEW_BUFFERS) {
 277       intel_update_framebuffer(ctx, ctx->DrawBuffer);
 278       if (ctx->DrawBuffer != ctx->ReadBuffer)
 279          intel_update_framebuffer(ctx, ctx->ReadBuffer);
 280    }
 281 }
 282
 283 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 284
 285 static void
 286 intel_flush_front(struct gl_context *ctx)
 287 {
 288    struct brw_context *brw = brw_context(ctx);
 289    __DRIcontext *driContext = brw->driContext;
 290    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 291    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 292
 293    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 294       if (flushFront(screen) && driDrawable &&
 295           driDrawable->loaderPrivate) {
 296
 297          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 298           *
 299           * This potentially resolves both front and back buffer. It
 300           * is unnecessary to resolve the back, but harms nothing except
 301           * performance. And no one cares about front-buffer render
 302           * performance.
 303           */
 304          intel_resolve_for_dri2_flush(brw, driDrawable);
 305          intel_batchbuffer_flush(brw);
 306
 307          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 308
 309          /* We set the dirty bit in intel_prepare_render() if we're
 310           * front buffer rendering once we get there.
 311           */
 312          brw->front_buffer_dirty = false;
 313       }
 314    }
 315 }
 316
 317 static void
 318 intel_glFlush(struct gl_context *ctx)
 319 {
 320    struct brw_context *brw = brw_context(ctx);
 321
 322    intel_batchbuffer_flush(brw);
 323    intel_flush_front(ctx);
 324
 325    brw->need_flush_throttle = true;
 326 }
 327
 328 static void
 329 intel_finish(struct gl_context * ctx)
 330 {
 331    struct brw_context *brw = brw_context(ctx);
 332
 333    intel_glFlush(ctx);
 334
 335    if (brw->batch.last_bo)
 336       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 337 }
 338
 339 static void
 340 brw_init_driver_functions(struct brw_context *brw,
 341                           struct dd_function_table *functions)
 342 {
 343    _mesa_init_driver_functions(functions);
 344
 345    /* GLX uses DRI2 invalidate events to handle window resizing.
 346     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 347     * which doesn't provide a mechanism for snooping the event queues.
 348     *
 349     * So EGL still relies on viewport hacks to handle window resizing.
 350     * This should go away with DRI3000.
 351     */
 352    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 353       functions->Viewport = intel_viewport;
 354
 355    functions->Flush = intel_glFlush;
 356    functions->Finish = intel_finish;
 357    functions->GetString = intel_get_string;
 358    functions->UpdateState = intel_update_state;
 359
 360    intelInitTextureFuncs(functions);
 361    intelInitTextureImageFuncs(functions);
 362    intelInitTextureSubImageFuncs(functions);
 363    intelInitTextureCopyImageFuncs(functions);
 364    intelInitCopyImageFuncs(functions);
 365    intelInitClearFuncs(functions);
 366    intelInitBufferFuncs(functions);
 367    intelInitPixelFuncs(functions);
 368    intelInitBufferObjectFuncs(functions);
 369    intel_init_syncobj_functions(functions);
 370    brw_init_object_purgeable_functions(functions);
 371
 372    brwInitFragProgFuncs( functions );
 373    brw_init_common_queryobj_functions(functions);
 374    if (brw->gen >= 6)
 375       gen6_init_queryobj_functions(functions);
 376    else
 377       gen4_init_queryobj_functions(functions);
 378    brw_init_compute_functions(functions);
 379    if (brw->gen >= 7)
 380       brw_init_conditional_render_functions(functions);
 381
 382    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 383
 384    functions->NewTransformFeedback = brw_new_transform_feedback;
 385    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 386    functions->GetTransformFeedbackVertexCount =
 387       brw_get_transform_feedback_vertex_count;
 388    if (brw->gen >= 7) {
 389       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 390       functions->EndTransformFeedback = gen7_end_transform_feedback;
 391       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 392       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 393    } else {
 394       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 395       functions->EndTransformFeedback = brw_end_transform_feedback;
 396    }
 397
 398    if (brw->gen >= 6)
 399       functions->GetSamplePosition = gen6_get_sample_position;
 400 }
 401
 402 static void
 403 brw_initialize_context_constants(struct brw_context *brw)
 404 {
 405    struct gl_context *ctx = &brw->ctx;
 406    const struct brw_compiler *compiler = brw->intelScreen->compiler;
 407
 408    const bool stage_exists[MESA_SHADER_STAGES] = {
 409       [MESA_SHADER_VERTEX] = true,
 410       [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
 411       [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
 412       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
 413       [MESA_SHADER_FRAGMENT] = true,
 414       [MESA_SHADER_COMPUTE] =
 415          (ctx->API == API_OPENGL_CORE &&
 416           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 417          (ctx->API == API_OPENGLES2 &&
 418           ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
 419          _mesa_extension_override_enables.ARB_compute_shader,
 420    };
 421
 422    unsigned num_stages = 0;
 423    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 424       if (stage_exists[i])
 425          num_stages++;
 426    }
 427
 428    unsigned max_samplers =
 429       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 430
 431    ctx->Const.MaxDualSourceDrawBuffers = 1;
 432    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 433    ctx->Const.MaxCombinedShaderOutputResources =
 434       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 435
 436    ctx->Const.QueryCounterBits.Timestamp = 36;
 437
 438    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 439    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 440    ctx->Const.MaxRenderbufferSize = 8192;
 441    ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
 442    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 443    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 444    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
 445    ctx->Const.MaxTextureMbytes = 1536;
 446    ctx->Const.MaxTextureRectSize = 1 << 12;
 447    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 448    ctx->Const.StripTextureBorder = true;
 449    if (brw->gen >= 7)
 450       ctx->Const.MaxProgramTextureGatherComponents = 4;
 451    else if (brw->gen == 6)
 452       ctx->Const.MaxProgramTextureGatherComponents = 1;
 453
 454    ctx->Const.MaxUniformBlockSize = 65536;
 455
 456    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 457       struct gl_program_constants *prog = &ctx->Const.Program[i];
 458
 459       if (!stage_exists[i])
 460          continue;
 461
 462       prog->MaxTextureImageUnits = max_samplers;
 463
 464       prog->MaxUniformBlocks = BRW_MAX_UBO;
 465       prog->MaxCombinedUniformComponents =
 466          prog->MaxUniformComponents +
 467          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 468
 469       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 470       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 471       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 472       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 473    }
 474
 475    ctx->Const.MaxTextureUnits =
 476       MIN2(ctx->Const.MaxTextureCoordUnits,
 477            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 478
 479    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 480    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 481    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 482    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 483    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 484    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 485    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 486
 487
 488    /* Hardware only supports a limited number of transform feedback buffers.
 489     * So we need to override the Mesa default (which is based only on software
 490     * limits).
 491     */
 492    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 493
 494    /* On Gen6, in the worst case, we use up one binding table entry per
 495     * transform feedback component (see comments above the definition of
 496     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 497     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 498     * BRW_MAX_SOL_BINDINGS.
 499     *
 500     * In "separate components" mode, we need to divide this value by
 501     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 502     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 503     */
 504    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 505    ctx->Const.MaxTransformFeedbackSeparateComponents =
 506       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 507
 508    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 509
 510    int max_samples;
 511    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 512    const int clamp_max_samples =
 513       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 514
 515    if (clamp_max_samples < 0) {
 516       max_samples = msaa_modes[0];
 517    } else {
 518       /* Select the largest supported MSAA mode that does not exceed
 519        * clamp_max_samples.
 520        */
 521       max_samples = 0;
 522       for (int i = 0; msaa_modes[i] != 0; ++i) {
 523          if (msaa_modes[i] <= clamp_max_samples) {
 524             max_samples = msaa_modes[i];
 525             break;
 526          }
 527       }
 528    }
 529
 530    ctx->Const.MaxSamples = max_samples;
 531    ctx->Const.MaxColorTextureSamples = max_samples;
 532    ctx->Const.MaxDepthTextureSamples = max_samples;
 533    ctx->Const.MaxIntegerSamples = max_samples;
 534    ctx->Const.MaxImageSamples = 0;
 535
 536    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 537     * to map indices of rectangular grid to sample numbers within a pixel.
 538     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 539     * extension implementation. For more details see the comment above
 540     * gen6_set_sample_maps() definition.
 541     */
 542    gen6_set_sample_maps(ctx);
 543
 544    ctx->Const.MinLineWidth = 1.0;
 545    ctx->Const.MinLineWidthAA = 1.0;
 546    if (brw->gen >= 6) {
 547       ctx->Const.MaxLineWidth = 7.375;
 548       ctx->Const.MaxLineWidthAA = 7.375;
 549       ctx->Const.LineWidthGranularity = 0.125;
 550    } else {
 551       ctx->Const.MaxLineWidth = 7.0;
 552       ctx->Const.MaxLineWidthAA = 7.0;
 553       ctx->Const.LineWidthGranularity = 0.5;
 554    }
 555
 556    /* For non-antialiased lines, we have to round the line width to the
 557     * nearest whole number. Make sure that we don't advertise a line
 558     * width that, when rounded, will be beyond the actual hardware
 559     * maximum.
 560     */
 561    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 562
 563    ctx->Const.MinPointSize = 1.0;
 564    ctx->Const.MinPointSizeAA = 1.0;
 565    ctx->Const.MaxPointSize = 255.0;
 566    ctx->Const.MaxPointSizeAA = 255.0;
 567    ctx->Const.PointSizeGranularity = 1.0;
 568
 569    if (brw->gen >= 5 || brw->is_g4x)
 570       ctx->Const.MaxClipPlanes = 8;
 571
 572    ctx->Const.LowerTessLevel = true;
 573
 574    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 575    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 576    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 577    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 578    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 579    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 580    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 581    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 582    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 583    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 584    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 585    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 586       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 587            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 588
 589    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 590    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 591    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 592    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 593    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 594    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 595    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 596    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 597    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 598       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 599            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 600
 601    /* Fragment shaders use real, 32-bit twos-complement integers for all
 602     * integer types.
 603     */
 604    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 605    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 606    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 607    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 608    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 609
 610    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 611    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 612    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 613    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 614    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 615
 616    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 617     * but we're not sure how it's actually done for vertex order,
 618     * that affect provoking vertex decision. Always use last vertex
 619     * convention for quad primitive which works as expected for now.
 620     */
 621    if (brw->gen >= 6)
 622       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 623
 624    ctx->Const.NativeIntegers = true;
 625    ctx->Const.VertexID_is_zero_based = true;
 626
 627    /* Regarding the CMP instruction, the Ivybridge PRM says:
 628     *
 629     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 630     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 631     *    0xFFFFFFFF) is assigned to dst."
 632     *
 633     * but PRMs for earlier generations say
 634     *
 635     *   "In dword format, one GRF may store up to 8 results. When the register
 636     *    is used later as a vector of Booleans, as only LSB at each channel
 637     *    contains meaning [sic] data, software should make sure all higher bits
 638     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 639     *
 640     * We select the representation of a true boolean uniform to be ~0, and fix
 641     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 642     */
 643    ctx->Const.UniformBooleanTrue = ~0;
 644
 645    /* From the gen4 PRM, volume 4 page 127:
 646     *
 647     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 648     *      the base address of the first element of the surface, computed in
 649     *      software by adding the surface base address to the byte offset of
 650     *      the element in the buffer."
 651     *
 652     * However, unaligned accesses are slower, so enforce buffer alignment.
 653     */
 654    ctx->Const.UniformBufferOffsetAlignment = 16;
 655
 656    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 657     * that we can safely have the CPU and GPU writing the same SSBO on
 658     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 659     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 660     * be updating disjoint regions of the buffer simultaneously and that will
 661     * break if the regions overlap the same cacheline.
 662     */
 663    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 664    ctx->Const.TextureBufferOffsetAlignment = 16;
 665    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 666
 667    if (brw->gen >= 6) {
 668       ctx->Const.MaxVarying = 32;
 669       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 670       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 671       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 672       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 673       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 674       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 675       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 676       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 677    }
 678
 679    /* We want the GLSL compiler to emit code that uses condition codes */
 680    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 681       ctx->Const.ShaderCompilerOptions[i] =
 682          brw->intelScreen->compiler->glsl_compiler_options[i];
 683    }
 684
 685    /* ARB_viewport_array */
 686    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 687       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 688       ctx->Const.ViewportSubpixelBits = 0;
 689
 690       /* Cast to float before negating because MaxViewportWidth is unsigned.
 691        */
 692       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 693       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 694    }
 695
 696    /* ARB_gpu_shader5 */
 697    if (brw->gen >= 7)
 698       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 699
 700    /* ARB_framebuffer_no_attachments */
 701    ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
 702    ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
 703    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 704    ctx->Const.MaxFramebufferSamples = max_samples;
 705 }
 706
 707 static void
 708 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
 709 {
 710    struct gl_context *ctx = &brw->ctx;
 711
 712    /* For ES, we set these constants based on SIMD8.
 713     *
 714     * TODO: Once we can always generate SIMD16, we should update this.
 715     *
 716     * For GL, we assume we can generate a SIMD16 program, but this currently
 717     * is not always true. This allows us to run more test cases, and will be
 718     * required based on desktop GL compute shader requirements.
 719     */
 720    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 721
 722    const uint32_t max_invocations = simd_size * max_threads;
 723    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 724    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 725    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 726    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 727    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 728 }
 729
 730 /**
 731  * Process driconf (drirc) options, setting appropriate context flags.
 732  *
 733  * intelInitExtensions still pokes at optionCache directly, in order to
 734  * avoid advertising various extensions.  No flags are set, so it makes
 735  * sense to continue doing that there.
 736  */
 737 static void
 738 brw_process_driconf_options(struct brw_context *brw)
 739 {
 740    struct gl_context *ctx = &brw->ctx;
 741
 742    driOptionCache *options = &brw->optionCache;
 743    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 744                        brw->driContext->driScreenPriv->myNum, "i965");
 745
 746    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 747    switch (bo_reuse_mode) {
 748    case DRI_CONF_BO_REUSE_DISABLED:
 749       break;
 750    case DRI_CONF_BO_REUSE_ALL:
 751       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 752       break;
 753    }
 754
 755    if (!driQueryOptionb(options, "hiz")) {
 756        brw->has_hiz = false;
 757        /* On gen6, you can only do separate stencil with HIZ. */
 758        if (brw->gen == 6)
 759           brw->has_separate_stencil = false;
 760    }
 761
 762    if (driQueryOptionb(options, "always_flush_batch")) {
 763       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 764       brw->always_flush_batch = true;
 765    }
 766
 767    if (driQueryOptionb(options, "always_flush_cache")) {
 768       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 769       brw->always_flush_cache = true;
 770    }
 771
 772    if (driQueryOptionb(options, "disable_throttling")) {
 773       fprintf(stderr, "disabling flush throttling\n");
 774       brw->disable_throttling = true;
 775    }
 776
 777    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 778
 779    ctx->Const.ForceGLSLExtensionsWarn =
 780       driQueryOptionb(options, "force_glsl_extensions_warn");
 781
 782    ctx->Const.DisableGLSLLineContinuations =
 783       driQueryOptionb(options, "disable_glsl_line_continuations");
 784
 785    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 786       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 787
 788    brw->dual_color_blend_by_location =
 789       driQueryOptionb(options, "dual_color_blend_by_location");
 790 }
 791
 792 GLboolean
 793 brwCreateContext(gl_api api,
 794                  const struct gl_config *mesaVis,
 795                  __DRIcontext *driContextPriv,
 796                  unsigned major_version,
 797                  unsigned minor_version,
 798                  uint32_t flags,
 799                  bool notify_reset,
 800                  unsigned *dri_ctx_error,
 801                  void *sharedContextPrivate)
 802 {
 803    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 804    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 805    struct intel_screen *screen = sPriv->driverPrivate;
 806    const struct brw_device_info *devinfo = screen->devinfo;
 807    struct dd_function_table functions;
 808
 809    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 810     * provides us with context reset notifications.
 811     */
 812    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 813       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 814
 815    if (screen->has_context_reset_notification)
 816       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 817
 818    if (flags & ~allowed_flags) {
 819       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 820       return false;
 821    }
 822
 823    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 824    if (!brw) {
 825       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 826       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 827       return false;
 828    }
 829
 830    driContextPriv->driverPrivate = brw;
 831    brw->driContext = driContextPriv;
 832    brw->intelScreen = screen;
 833    brw->bufmgr = screen->bufmgr;
 834
 835    brw->gen = devinfo->gen;
 836    brw->gt = devinfo->gt;
 837    brw->is_g4x = devinfo->is_g4x;
 838    brw->is_baytrail = devinfo->is_baytrail;
 839    brw->is_haswell = devinfo->is_haswell;
 840    brw->is_cherryview = devinfo->is_cherryview;
 841    brw->is_broxton = devinfo->is_broxton;
 842    brw->has_llc = devinfo->has_llc;
 843    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 844    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 845    brw->has_pln = devinfo->has_pln;
 846    brw->has_compr4 = devinfo->has_compr4;
 847    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 848    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 849    brw->needs_unlit_centroid_workaround =
 850       devinfo->needs_unlit_centroid_workaround;
 851
 852    brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
 853    brw->has_swizzling = screen->hw_has_swizzling;
 854
 855    brw->vs.base.stage = MESA_SHADER_VERTEX;
 856    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 857    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 858    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 859    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 860    if (brw->gen >= 8) {
 861       gen8_init_vtable_surface_functions(brw);
 862       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 863    } else if (brw->gen >= 7) {
 864       gen7_init_vtable_surface_functions(brw);
 865       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 866    } else if (brw->gen >= 6) {
 867       gen6_init_vtable_surface_functions(brw);
 868       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 869    } else {
 870       gen4_init_vtable_surface_functions(brw);
 871       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 872    }
 873
 874    brw_init_driver_functions(brw, &functions);
 875
 876    if (notify_reset)
 877       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 878
 879    struct gl_context *ctx = &brw->ctx;
 880
 881    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 882       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 883       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 884       intelDestroyContext(driContextPriv);
 885       return false;
 886    }
 887
 888    driContextSetFlags(ctx, flags);
 889
 890    /* Initialize the software rasterizer and helper modules.
 891     *
 892     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 893     * software fallbacks (which we have to support on legacy GL to do weird
 894     * glDrawPixels(), glBitmap(), and other functions).
 895     */
 896    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 897       _swrast_CreateContext(ctx);
 898    }
 899
 900    _vbo_CreateContext(ctx);
 901    if (ctx->swrast_context) {
 902       _tnl_CreateContext(ctx);
 903       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 904       _swsetup_CreateContext(ctx);
 905
 906       /* Configure swrast to match hardware characteristics: */
 907       _swrast_allow_pixel_fog(ctx, false);
 908       _swrast_allow_vertex_fog(ctx, true);
 909    }
 910
 911    _mesa_meta_init(ctx);
 912
 913    brw_process_driconf_options(brw);
 914
 915    if (INTEL_DEBUG & DEBUG_PERF)
 916       brw->perf_debug = true;
 917
 918    brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
 919    brw_initialize_context_constants(brw);
 920
 921    ctx->Const.ResetStrategy = notify_reset
 922       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 923
 924    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 925    _mesa_init_point(ctx);
 926
 927    intel_fbo_init(brw);
 928
 929    intel_batchbuffer_init(brw);
 930
 931    if (brw->gen >= 6) {
 932       /* Create a new hardware context.  Using a hardware context means that
 933        * our GPU state will be saved/restored on context switch, allowing us
 934        * to assume that the GPU is in the same state we left it in.
 935        *
 936        * This is required for transform feedback buffer offsets, query objects,
 937        * and also allows us to reduce how much state we have to emit.
 938        */
 939       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 940
 941       if (!brw->hw_ctx) {
 942          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 943          intelDestroyContext(driContextPriv);
 944          return false;
 945       }
 946    }
 947
 948    if (brw_init_pipe_control(brw, devinfo)) {
 949       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 950       intelDestroyContext(driContextPriv);
 951       return false;
 952    }
 953
 954    brw_init_state(brw);
 955
 956    intelInitExtensions(ctx);
 957
 958    brw_init_surface_formats(brw);
 959
 960    brw->max_vs_threads = devinfo->max_vs_threads;
 961    brw->max_hs_threads = devinfo->max_hs_threads;
 962    brw->max_ds_threads = devinfo->max_ds_threads;
 963    brw->max_gs_threads = devinfo->max_gs_threads;
 964    brw->max_wm_threads = devinfo->max_wm_threads;
 965    brw->max_cs_threads = devinfo->max_cs_threads;
 966    brw->urb.size = devinfo->urb.size;
 967    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 968    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 969    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 970    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 971    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 972
 973    /* Estimate the size of the mappable aperture into the GTT.  There's an
 974     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 975     * It turns out it's basically always 256MB, though some ancient hardware
 976     * was smaller.
 977     */
 978    uint32_t gtt_size = 256 * 1024 * 1024;
 979
 980    /* We don't want to map two objects such that a memcpy between them would
 981     * just fault one mapping in and then the other over and over forever.  So
 982     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 983     * taken up by things like the framebuffer and the ringbuffer and such, so
 984     * be more conservative.
 985     */
 986    brw->max_gtt_map_object_size = gtt_size / 4;
 987
 988    if (brw->gen == 6)
 989       brw->urb.gs_present = false;
 990
 991    brw->prim_restart.in_progress = false;
 992    brw->prim_restart.enable_cut_index = false;
 993    brw->gs.enabled = false;
 994    brw->sf.viewport_transform_enable = true;
 995
 996    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 997
 998    brw->use_resource_streamer = screen->has_resource_streamer &&
 999       (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
1000        env_var_as_boolean("INTEL_USE_GATHER", false));
1001
1002    ctx->VertexProgram._MaintainTnlProgram = true;
1003    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1004
1005    brw_draw_init( brw );
1006
1007    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1008       /* Turn on some extra GL_ARB_debug_output generation. */
1009       brw->perf_debug = true;
1010    }
1011
1012    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
1013       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1014
1015    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1016       brw_init_shader_time(brw);
1017
1018    _mesa_compute_version(ctx);
1019
1020    _mesa_initialize_dispatch_tables(ctx);
1021    _mesa_initialize_vbo_vtxfmt(ctx);
1022
1023    if (ctx->Extensions.AMD_performance_monitor) {
1024       brw_init_performance_monitors(brw);
1025    }
1026
1027    vbo_use_buffer_objects(ctx);
1028    vbo_always_unmap_buffers(ctx);
1029
1030    return true;
1031 }
1032
1033 void
1034 intelDestroyContext(__DRIcontext * driContextPriv)
1035 {
1036    struct brw_context *brw =
1037       (struct brw_context *) driContextPriv->driverPrivate;
1038    struct gl_context *ctx = &brw->ctx;
1039
1040    /* Dump a final BMP in case the application doesn't call SwapBuffers */
1041    if (INTEL_DEBUG & DEBUG_AUB) {
1042       intel_batchbuffer_flush(brw);
1043       aub_dump_bmp(&brw->ctx);
1044    }
1045
1046    _mesa_meta_free(&brw->ctx);
1047    brw_meta_fast_clear_free(brw);
1048
1049    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1050       /* Force a report. */
1051       brw->shader_time.report_time = 0;
1052
1053       brw_collect_and_report_shader_time(brw);
1054       brw_destroy_shader_time(brw);
1055    }
1056
1057    brw_destroy_state(brw);
1058    brw_draw_destroy(brw);
1059
1060    drm_intel_bo_unreference(brw->curbe.curbe_bo);
1061    if (brw->vs.base.scratch_bo)
1062       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1063    if (brw->gs.base.scratch_bo)
1064       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1065    if (brw->wm.base.scratch_bo)
1066       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1067
1068    gen7_reset_hw_bt_pool_offsets(brw);
1069    drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1070    brw->hw_bt_pool.bo = NULL;
1071
1072    drm_intel_gem_context_destroy(brw->hw_ctx);
1073
1074    if (ctx->swrast_context) {
1075       _swsetup_DestroyContext(&brw->ctx);
1076       _tnl_DestroyContext(&brw->ctx);
1077    }
1078    _vbo_DestroyContext(&brw->ctx);
1079
1080    if (ctx->swrast_context)
1081       _swrast_DestroyContext(&brw->ctx);
1082
1083    brw_fini_pipe_control(brw);
1084    intel_batchbuffer_free(brw);
1085
1086    drm_intel_bo_unreference(brw->throttle_batch[1]);
1087    drm_intel_bo_unreference(brw->throttle_batch[0]);
1088    brw->throttle_batch[1] = NULL;
1089    brw->throttle_batch[0] = NULL;
1090
1091    driDestroyOptionCache(&brw->optionCache);
1092
1093    /* free the Mesa context */
1094    _mesa_free_context_data(&brw->ctx);
1095
1096    ralloc_free(brw);
1097    driContextPriv->driverPrivate = NULL;
1098 }
1099
1100 GLboolean
1101 intelUnbindContext(__DRIcontext * driContextPriv)
1102 {
1103    /* Unset current context and dispath table */
1104    _mesa_make_current(NULL, NULL, NULL);
1105
1106    return true;
1107 }
1108
1109 /**
1110  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1111  * on window system framebuffers.
1112  *
1113  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1114  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1115  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1116  * for a visual where you're guaranteed to be capable, but it turns out that
1117  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1118  * incapable ones, because there's no difference between the two in resources
1119  * used.  Applications thus get built that accidentally rely on the default
1120  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1121  * great...
1122  *
1123  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1124  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1125  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1126  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1127  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1128  * and get no sRGB encode (assuming that both kinds of visual are available).
1129  * Thus our choice to support sRGB by default on our visuals for desktop would
1130  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1131  *
1132  * Unfortunately, renderbuffer setup happens before a context is created.  So
1133  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1134  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1135  * yet), we go turn that back off before anyone finds out.
1136  */
1137 static void
1138 intel_gles3_srgb_workaround(struct brw_context *brw,
1139                             struct gl_framebuffer *fb)
1140 {
1141    struct gl_context *ctx = &brw->ctx;
1142
1143    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1144       return;
1145
1146    /* Some day when we support the sRGB capable bit on visuals available for
1147     * GLES, we'll need to respect that and not disable things here.
1148     */
1149    fb->Visual.sRGBCapable = false;
1150    for (int i = 0; i < BUFFER_COUNT; i++) {
1151       if (fb->Attachment[i].Renderbuffer &&
1152           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1153          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1154       }
1155    }
1156 }
1157
1158 GLboolean
1159 intelMakeCurrent(__DRIcontext * driContextPriv,
1160                  __DRIdrawable * driDrawPriv,
1161                  __DRIdrawable * driReadPriv)
1162 {
1163    struct brw_context *brw;
1164    GET_CURRENT_CONTEXT(curCtx);
1165
1166    if (driContextPriv)
1167       brw = (struct brw_context *) driContextPriv->driverPrivate;
1168    else
1169       brw = NULL;
1170
1171    /* According to the glXMakeCurrent() man page: "Pending commands to
1172     * the previous context, if any, are flushed before it is released."
1173     * But only flush if we're actually changing contexts.
1174     */
1175    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1176       _mesa_flush(curCtx);
1177    }
1178
1179    if (driContextPriv) {
1180       struct gl_context *ctx = &brw->ctx;
1181       struct gl_framebuffer *fb, *readFb;
1182
1183       if (driDrawPriv == NULL) {
1184          fb = _mesa_get_incomplete_framebuffer();
1185       } else {
1186          fb = driDrawPriv->driverPrivate;
1187          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1188       }
1189
1190       if (driReadPriv == NULL) {
1191          readFb = _mesa_get_incomplete_framebuffer();
1192       } else {
1193          readFb = driReadPriv->driverPrivate;
1194          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1195       }
1196
1197       /* The sRGB workaround changes the renderbuffer's format. We must change
1198        * the format before the renderbuffer's miptree get's allocated, otherwise
1199        * the formats of the renderbuffer and its miptree will differ.
1200        */
1201       intel_gles3_srgb_workaround(brw, fb);
1202       intel_gles3_srgb_workaround(brw, readFb);
1203
1204       /* If the context viewport hasn't been initialized, force a call out to
1205        * the loader to get buffers so we have a drawable size for the initial
1206        * viewport. */
1207       if (!brw->ctx.ViewportInitialized)
1208          intel_prepare_render(brw);
1209
1210       _mesa_make_current(ctx, fb, readFb);
1211    } else {
1212       _mesa_make_current(NULL, NULL, NULL);
1213    }
1214
1215    return true;
1216 }
1217
1218 void
1219 intel_resolve_for_dri2_flush(struct brw_context *brw,
1220                              __DRIdrawable *drawable)
1221 {
1222    if (brw->gen < 6) {
1223       /* MSAA and fast color clear are not supported, so don't waste time
1224        * checking whether a resolve is needed.
1225        */
1226       return;
1227    }
1228
1229    struct gl_framebuffer *fb = drawable->driverPrivate;
1230    struct intel_renderbuffer *rb;
1231
1232    /* Usually, only the back buffer will need to be downsampled. However,
1233     * the front buffer will also need it if the user has rendered into it.
1234     */
1235    static const gl_buffer_index buffers[2] = {
1236          BUFFER_BACK_LEFT,
1237          BUFFER_FRONT_LEFT,
1238    };
1239
1240    for (int i = 0; i < 2; ++i) {
1241       rb = intel_get_renderbuffer(fb, buffers[i]);
1242       if (rb == NULL || rb->mt == NULL)
1243          continue;
1244       if (rb->mt->num_samples <= 1)
1245          intel_miptree_resolve_color(brw, rb->mt, 0);
1246       else
1247          intel_renderbuffer_downsample(brw, rb);
1248    }
1249 }
1250
1251 static unsigned
1252 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1253 {
1254    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1255 }
1256
1257 static void
1258 intel_query_dri2_buffers(struct brw_context *brw,
1259                          __DRIdrawable *drawable,
1260                          __DRIbuffer **buffers,
1261                          int *count);
1262
1263 static void
1264 intel_process_dri2_buffer(struct brw_context *brw,
1265                           __DRIdrawable *drawable,
1266                           __DRIbuffer *buffer,
1267                           struct intel_renderbuffer *rb,
1268                           const char *buffer_name);
1269
1270 static void
1271 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1272
1273 static void
1274 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1275 {
1276    struct gl_framebuffer *fb = drawable->driverPrivate;
1277    struct intel_renderbuffer *rb;
1278    __DRIbuffer *buffers = NULL;
1279    int i, count;
1280    const char *region_name;
1281
1282    /* Set this up front, so that in case our buffers get invalidated
1283     * while we're getting new buffers, we don't clobber the stamp and
1284     * thus ignore the invalidate. */
1285    drawable->lastStamp = drawable->dri2.stamp;
1286
1287    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1288       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1289
1290    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1291
1292    if (buffers == NULL)
1293       return;
1294
1295    for (i = 0; i < count; i++) {
1296        switch (buffers[i].attachment) {
1297        case __DRI_BUFFER_FRONT_LEFT:
1298            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1299            region_name = "dri2 front buffer";
1300            break;
1301
1302        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1303            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1304            region_name = "dri2 fake front buffer";
1305            break;
1306
1307        case __DRI_BUFFER_BACK_LEFT:
1308            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1309            region_name = "dri2 back buffer";
1310            break;
1311
1312        case __DRI_BUFFER_DEPTH:
1313        case __DRI_BUFFER_HIZ:
1314        case __DRI_BUFFER_DEPTH_STENCIL:
1315        case __DRI_BUFFER_STENCIL:
1316        case __DRI_BUFFER_ACCUM:
1317        default:
1318            fprintf(stderr,
1319                    "unhandled buffer attach event, attachment type %d\n",
1320                    buffers[i].attachment);
1321            return;
1322        }
1323
1324        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1325    }
1326
1327 }
1328
1329 void
1330 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1331 {
1332    struct brw_context *brw = context->driverPrivate;
1333    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1334
1335    /* Set this up front, so that in case our buffers get invalidated
1336     * while we're getting new buffers, we don't clobber the stamp and
1337     * thus ignore the invalidate. */
1338    drawable->lastStamp = drawable->dri2.stamp;
1339
1340    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1341       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1342
1343    if (screen->image.loader)
1344       intel_update_image_buffers(brw, drawable);
1345    else
1346       intel_update_dri2_buffers(brw, drawable);
1347
1348    driUpdateFramebufferSize(&brw->ctx, drawable);
1349 }
1350
1351 /**
1352  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1353  * state is required.
1354  */
1355 void
1356 intel_prepare_render(struct brw_context *brw)
1357 {
1358    struct gl_context *ctx = &brw->ctx;
1359    __DRIcontext *driContext = brw->driContext;
1360    __DRIdrawable *drawable;
1361
1362    drawable = driContext->driDrawablePriv;
1363    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1364       if (drawable->lastStamp != drawable->dri2.stamp)
1365          intel_update_renderbuffers(driContext, drawable);
1366       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1367    }
1368
1369    drawable = driContext->driReadablePriv;
1370    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1371       if (drawable->lastStamp != drawable->dri2.stamp)
1372          intel_update_renderbuffers(driContext, drawable);
1373       driContext->dri2.read_stamp = drawable->dri2.stamp;
1374    }
1375
1376    /* If we're currently rendering to the front buffer, the rendering
1377     * that will happen next will probably dirty the front buffer.  So
1378     * mark it as dirty here.
1379     */
1380    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1381       brw->front_buffer_dirty = true;
1382 }
1383
1384 /**
1385  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1386  *
1387  * To determine which DRI buffers to request, examine the renderbuffers
1388  * attached to the drawable's framebuffer. Then request the buffers with
1389  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1390  *
1391  * This is called from intel_update_renderbuffers().
1392  *
1393  * \param drawable      Drawable whose buffers are queried.
1394  * \param buffers       [out] List of buffers returned by DRI2 query.
1395  * \param buffer_count  [out] Number of buffers returned.
1396  *
1397  * \see intel_update_renderbuffers()
1398  * \see DRI2GetBuffers()
1399  * \see DRI2GetBuffersWithFormat()
1400  */
1401 static void
1402 intel_query_dri2_buffers(struct brw_context *brw,
1403                          __DRIdrawable *drawable,
1404                          __DRIbuffer **buffers,
1405                          int *buffer_count)
1406 {
1407    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1408    struct gl_framebuffer *fb = drawable->driverPrivate;
1409    int i = 0;
1410    unsigned attachments[8];
1411
1412    struct intel_renderbuffer *front_rb;
1413    struct intel_renderbuffer *back_rb;
1414
1415    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1416    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1417
1418    memset(attachments, 0, sizeof(attachments));
1419    if ((_mesa_is_front_buffer_drawing(fb) ||
1420         _mesa_is_front_buffer_reading(fb) ||
1421         !back_rb) && front_rb) {
1422       /* If a fake front buffer is in use, then querying for
1423        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1424        * the real front buffer to the fake front buffer.  So before doing the
1425        * query, we need to make sure all the pending drawing has landed in the
1426        * real front buffer.
1427        */
1428       intel_batchbuffer_flush(brw);
1429       intel_flush_front(&brw->ctx);
1430
1431       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1432       attachments[i++] = intel_bits_per_pixel(front_rb);
1433    } else if (front_rb && brw->front_buffer_dirty) {
1434       /* We have pending front buffer rendering, but we aren't querying for a
1435        * front buffer.  If the front buffer we have is a fake front buffer,
1436        * the X server is going to throw it away when it processes the query.
1437        * So before doing the query, make sure all the pending drawing has
1438        * landed in the real front buffer.
1439        */
1440       intel_batchbuffer_flush(brw);
1441       intel_flush_front(&brw->ctx);
1442    }
1443
1444    if (back_rb) {
1445       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1446       attachments[i++] = intel_bits_per_pixel(back_rb);
1447    }
1448
1449    assert(i <= ARRAY_SIZE(attachments));
1450
1451    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1452                                                         &drawable->w,
1453                                                         &drawable->h,
1454                                                         attachments, i / 2,
1455                                                         buffer_count,
1456                                                         drawable->loaderPrivate);
1457 }
1458
1459 /**
1460  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1461  *
1462  * This is called from intel_update_renderbuffers().
1463  *
1464  * \par Note:
1465  *    DRI buffers whose attachment point is DRI2BufferStencil or
1466  *    DRI2BufferDepthStencil are handled as special cases.
1467  *
1468  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1469  *        that is passed to drm_intel_bo_gem_create_from_name().
1470  *
1471  * \see intel_update_renderbuffers()
1472  */
1473 static void
1474 intel_process_dri2_buffer(struct brw_context *brw,
1475                           __DRIdrawable *drawable,
1476                           __DRIbuffer *buffer,
1477                           struct intel_renderbuffer *rb,
1478                           const char *buffer_name)
1479 {
1480    struct gl_framebuffer *fb = drawable->driverPrivate;
1481    drm_intel_bo *bo;
1482
1483    if (!rb)
1484       return;
1485
1486    unsigned num_samples = rb->Base.Base.NumSamples;
1487
1488    /* We try to avoid closing and reopening the same BO name, because the first
1489     * use of a mapping of the buffer involves a bunch of page faulting which is
1490     * moderately expensive.
1491     */
1492    struct intel_mipmap_tree *last_mt;
1493    if (num_samples == 0)
1494       last_mt = rb->mt;
1495    else
1496       last_mt = rb->singlesample_mt;
1497
1498    uint32_t old_name = 0;
1499    if (last_mt) {
1500        /* The bo already has a name because the miptree was created by a
1501         * previous call to intel_process_dri2_buffer(). If a bo already has a
1502         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1503         * create a new name.
1504         */
1505       drm_intel_bo_flink(last_mt->bo, &old_name);
1506    }
1507
1508    if (old_name == buffer->name)
1509       return;
1510
1511    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1512       fprintf(stderr,
1513               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1514               buffer->name, buffer->attachment,
1515               buffer->cpp, buffer->pitch);
1516    }
1517
1518    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1519                                           buffer->name);
1520    if (!bo) {
1521       fprintf(stderr,
1522               "Failed to open BO for returned DRI2 buffer "
1523               "(%dx%d, %s, named %d).\n"
1524               "This is likely a bug in the X Server that will lead to a "
1525               "crash soon.\n",
1526               drawable->w, drawable->h, buffer_name, buffer->name);
1527       return;
1528    }
1529
1530    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1531                                             drawable->w, drawable->h,
1532                                             buffer->pitch);
1533
1534    if (_mesa_is_front_buffer_drawing(fb) &&
1535        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1536         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1537        rb->Base.Base.NumSamples > 1) {
1538       intel_renderbuffer_upsample(brw, rb);
1539    }
1540
1541    assert(rb->mt);
1542
1543    drm_intel_bo_unreference(bo);
1544 }
1545
1546 /**
1547  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1548  *
1549  * To determine which DRI buffers to request, examine the renderbuffers
1550  * attached to the drawable's framebuffer. Then request the buffers from
1551  * the image loader
1552  *
1553  * This is called from intel_update_renderbuffers().
1554  *
1555  * \param drawable      Drawable whose buffers are queried.
1556  * \param buffers       [out] List of buffers returned by DRI2 query.
1557  * \param buffer_count  [out] Number of buffers returned.
1558  *
1559  * \see intel_update_renderbuffers()
1560  */
1561
1562 static void
1563 intel_update_image_buffer(struct brw_context *intel,
1564                           __DRIdrawable *drawable,
1565                           struct intel_renderbuffer *rb,
1566                           __DRIimage *buffer,
1567                           enum __DRIimageBufferMask buffer_type)
1568 {
1569    struct gl_framebuffer *fb = drawable->driverPrivate;
1570
1571    if (!rb || !buffer->bo)
1572       return;
1573
1574    unsigned num_samples = rb->Base.Base.NumSamples;
1575
1576    /* Check and see if we're already bound to the right
1577     * buffer object
1578     */
1579    struct intel_mipmap_tree *last_mt;
1580    if (num_samples == 0)
1581       last_mt = rb->mt;
1582    else
1583       last_mt = rb->singlesample_mt;
1584
1585    if (last_mt && last_mt->bo == buffer->bo)
1586       return;
1587
1588    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1589                                             buffer->width, buffer->height,
1590                                             buffer->pitch);
1591
1592    if (_mesa_is_front_buffer_drawing(fb) &&
1593        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1594        rb->Base.Base.NumSamples > 1) {
1595       intel_renderbuffer_upsample(intel, rb);
1596    }
1597 }
1598
1599 static void
1600 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1601 {
1602    struct gl_framebuffer *fb = drawable->driverPrivate;
1603    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1604    struct intel_renderbuffer *front_rb;
1605    struct intel_renderbuffer *back_rb;
1606    struct __DRIimageList images;
1607    unsigned int format;
1608    uint32_t buffer_mask = 0;
1609
1610    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1611    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1612
1613    if (back_rb)
1614       format = intel_rb_format(back_rb);
1615    else if (front_rb)
1616       format = intel_rb_format(front_rb);
1617    else
1618       return;
1619
1620    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1621                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1622       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1623    }
1624
1625    if (back_rb)
1626       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1627
1628    (*screen->image.loader->getBuffers) (drawable,
1629                                         driGLFormatToImageFormat(format),
1630                                         &drawable->dri2.stamp,
1631                                         drawable->loaderPrivate,
1632                                         buffer_mask,
1633                                         &images);
1634
1635    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1636       drawable->w = images.front->width;
1637       drawable->h = images.front->height;
1638       intel_update_image_buffer(brw,
1639                                 drawable,
1640                                 front_rb,
1641                                 images.front,
1642                                 __DRI_IMAGE_BUFFER_FRONT);
1643    }
1644    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1645       drawable->w = images.back->width;
1646       drawable->h = images.back->height;
1647       intel_update_image_buffer(brw,
1648                                 drawable,
1649                                 back_rb,
1650                                 images.back,
1651                                 __DRI_IMAGE_BUFFER_BACK);
1652    }
1653 }