src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44
  45 #include "vbo/vbo_context.h"
  46
  47 #include "drivers/common/driverfuncs.h"
  48 #include "drivers/common/meta.h"
  49 #include "utils.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_defines.h"
  53 #include "brw_draw.h"
  54 #include "brw_state.h"
  55
  56 #include "intel_batchbuffer.h"
  57 #include "intel_buffer_objects.h"
  58 #include "intel_buffers.h"
  59 #include "intel_fbo.h"
  60 #include "intel_mipmap_tree.h"
  61 #include "intel_pixel.h"
  62 #include "intel_image.h"
  63 #include "intel_tex.h"
  64 #include "intel_tex_obj.h"
  65
  66 #include "swrast_setup/swrast_setup.h"
  67 #include "tnl/tnl.h"
  68 #include "tnl/t_pipeline.h"
  69 #include "util/ralloc.h"
  70
  71 #include "glsl/nir/nir.h"
  72
  73 /***************************************
  74  * Mesa's Driver Functions
  75  ***************************************/
  76
  77 static size_t
  78 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  79                              GLenum internalFormat, int samples[16])
  80 {
  81    struct brw_context *brw = brw_context(ctx);
  82
  83    (void) target;
  84
  85    switch (brw->gen) {
  86    case 9:
  87    case 8:
  88       samples[0] = 8;
  89       samples[1] = 4;
  90       samples[2] = 2;
  91       return 3;
  92
  93    case 7:
  94       samples[0] = 8;
  95       samples[1] = 4;
  96       return 2;
  97
  98    case 6:
  99       samples[0] = 4;
 100       return 1;
 101
 102    default:
 103       assert(brw->gen < 6);
 104       samples[0] = 1;
 105       return 1;
 106    }
 107 }
 108
 109 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 110
 111 const char *
 112 brw_get_renderer_string(unsigned deviceID)
 113 {
 114    const char *chipset;
 115    static char buffer[128];
 116
 117    switch (deviceID) {
 118 #undef CHIPSET
 119 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 120 #include "pci_ids/i965_pci_ids.h"
 121    default:
 122       chipset = "Unknown Intel Chipset";
 123       break;
 124    }
 125
 126    (void) driGetRendererString(buffer, chipset, 0);
 127    return buffer;
 128 }
 129
 130 static const GLubyte *
 131 intel_get_string(struct gl_context * ctx, GLenum name)
 132 {
 133    const struct brw_context *const brw = brw_context(ctx);
 134
 135    switch (name) {
 136    case GL_VENDOR:
 137       return (GLubyte *) brw_vendor_string;
 138
 139    case GL_RENDERER:
 140       return
 141          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 142
 143    default:
 144       return NULL;
 145    }
 146 }
 147
 148 static void
 149 intel_viewport(struct gl_context *ctx)
 150 {
 151    struct brw_context *brw = brw_context(ctx);
 152    __DRIcontext *driContext = brw->driContext;
 153
 154    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 155       dri2InvalidateDrawable(driContext->driDrawablePriv);
 156       dri2InvalidateDrawable(driContext->driReadablePriv);
 157    }
 158 }
 159
 160 static void
 161 intel_update_state(struct gl_context * ctx, GLuint new_state)
 162 {
 163    struct brw_context *brw = brw_context(ctx);
 164    struct intel_texture_object *tex_obj;
 165    struct intel_renderbuffer *depth_irb;
 166
 167    if (ctx->swrast_context)
 168       _swrast_InvalidateState(ctx, new_state);
 169    _vbo_InvalidateState(ctx, new_state);
 170
 171    brw->NewGLState |= new_state;
 172
 173    _mesa_unlock_context_textures(ctx);
 174
 175    /* Resolve the depth buffer's HiZ buffer. */
 176    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 177    if (depth_irb)
 178       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 179
 180    /* Resolve depth buffer and render cache of each enabled texture. */
 181    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 182    for (int i = 0; i <= maxEnabledUnit; i++) {
 183       if (!ctx->Texture.Unit[i]._Current)
 184          continue;
 185       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 186       if (!tex_obj || !tex_obj->mt)
 187          continue;
 188       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 189       intel_miptree_resolve_color(brw, tex_obj->mt);
 190       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 191    }
 192
 193    _mesa_lock_context_textures(ctx);
 194 }
 195
 196 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 197
 198 static void
 199 intel_flush_front(struct gl_context *ctx)
 200 {
 201    struct brw_context *brw = brw_context(ctx);
 202    __DRIcontext *driContext = brw->driContext;
 203    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 204    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 205
 206    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 207       if (flushFront(screen) && driDrawable &&
 208           driDrawable->loaderPrivate) {
 209
 210          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 211           *
 212           * This potentially resolves both front and back buffer. It
 213           * is unnecessary to resolve the back, but harms nothing except
 214           * performance. And no one cares about front-buffer render
 215           * performance.
 216           */
 217          intel_resolve_for_dri2_flush(brw, driDrawable);
 218          intel_batchbuffer_flush(brw);
 219
 220          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 221
 222          /* We set the dirty bit in intel_prepare_render() if we're
 223           * front buffer rendering once we get there.
 224           */
 225          brw->front_buffer_dirty = false;
 226       }
 227    }
 228 }
 229
 230 static void
 231 intel_glFlush(struct gl_context *ctx)
 232 {
 233    struct brw_context *brw = brw_context(ctx);
 234
 235    intel_batchbuffer_flush(brw);
 236    intel_flush_front(ctx);
 237
 238    brw->need_flush_throttle = true;
 239 }
 240
 241 static void
 242 intel_finish(struct gl_context * ctx)
 243 {
 244    struct brw_context *brw = brw_context(ctx);
 245
 246    intel_glFlush(ctx);
 247
 248    if (brw->batch.last_bo)
 249       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 250 }
 251
 252 static void
 253 brw_init_driver_functions(struct brw_context *brw,
 254                           struct dd_function_table *functions)
 255 {
 256    _mesa_init_driver_functions(functions);
 257
 258    /* GLX uses DRI2 invalidate events to handle window resizing.
 259     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 260     * which doesn't provide a mechanism for snooping the event queues.
 261     *
 262     * So EGL still relies on viewport hacks to handle window resizing.
 263     * This should go away with DRI3000.
 264     */
 265    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 266       functions->Viewport = intel_viewport;
 267
 268    functions->Flush = intel_glFlush;
 269    functions->Finish = intel_finish;
 270    functions->GetString = intel_get_string;
 271    functions->UpdateState = intel_update_state;
 272
 273    intelInitTextureFuncs(functions);
 274    intelInitTextureImageFuncs(functions);
 275    intelInitTextureSubImageFuncs(functions);
 276    intelInitTextureCopyImageFuncs(functions);
 277    intelInitCopyImageFuncs(functions);
 278    intelInitClearFuncs(functions);
 279    intelInitBufferFuncs(functions);
 280    intelInitPixelFuncs(functions);
 281    intelInitBufferObjectFuncs(functions);
 282    intel_init_syncobj_functions(functions);
 283    brw_init_object_purgeable_functions(functions);
 284
 285    brwInitFragProgFuncs( functions );
 286    brw_init_common_queryobj_functions(functions);
 287    if (brw->gen >= 6)
 288       gen6_init_queryobj_functions(functions);
 289    else
 290       gen4_init_queryobj_functions(functions);
 291    brw_init_compute_functions(functions);
 292
 293    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 294
 295    functions->NewTransformFeedback = brw_new_transform_feedback;
 296    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 297    functions->GetTransformFeedbackVertexCount =
 298       brw_get_transform_feedback_vertex_count;
 299    if (brw->gen >= 7) {
 300       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 301       functions->EndTransformFeedback = gen7_end_transform_feedback;
 302       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 303       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 304    } else {
 305       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 306       functions->EndTransformFeedback = brw_end_transform_feedback;
 307    }
 308
 309    if (brw->gen >= 6)
 310       functions->GetSamplePosition = gen6_get_sample_position;
 311 }
 312
 313 static void
 314 brw_initialize_context_constants(struct brw_context *brw)
 315 {
 316    struct gl_context *ctx = &brw->ctx;
 317
 318    unsigned max_samplers =
 319       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 320
 321    ctx->Const.QueryCounterBits.Timestamp = 36;
 322
 323    ctx->Const.StripTextureBorder = true;
 324
 325    ctx->Const.MaxDualSourceDrawBuffers = 1;
 326    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 327    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 328    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 329    ctx->Const.MaxTextureUnits =
 330       MIN2(ctx->Const.MaxTextureCoordUnits,
 331            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 332    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 333    if (brw->gen >= 6)
 334       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 335    else
 336       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 337    if (_mesa_extension_override_enables.ARB_compute_shader) {
 338       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 339       ctx->Const.MaxUniformBufferBindings += 12;
 340    } else {
 341       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 342    }
 343    ctx->Const.MaxCombinedTextureImageUnits =
 344       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 345       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 346       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 347       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 348
 349    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 350    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 351       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 352    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 353    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 354    ctx->Const.MaxTextureMbytes = 1536;
 355
 356    if (brw->gen >= 7)
 357       ctx->Const.MaxArrayTextureLayers = 2048;
 358    else
 359       ctx->Const.MaxArrayTextureLayers = 512;
 360
 361    ctx->Const.MaxTextureRectSize = 1 << 12;
 362
 363    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 364
 365    ctx->Const.MaxRenderbufferSize = 8192;
 366
 367    /* Hardware only supports a limited number of transform feedback buffers.
 368     * So we need to override the Mesa default (which is based only on software
 369     * limits).
 370     */
 371    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 372
 373    /* On Gen6, in the worst case, we use up one binding table entry per
 374     * transform feedback component (see comments above the definition of
 375     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 376     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 377     * BRW_MAX_SOL_BINDINGS.
 378     *
 379     * In "separate components" mode, we need to divide this value by
 380     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 381     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 382     */
 383    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 384    ctx->Const.MaxTransformFeedbackSeparateComponents =
 385       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 386
 387    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 388
 389    int max_samples;
 390    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 391    const int clamp_max_samples =
 392       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 393
 394    if (clamp_max_samples < 0) {
 395       max_samples = msaa_modes[0];
 396    } else {
 397       /* Select the largest supported MSAA mode that does not exceed
 398        * clamp_max_samples.
 399        */
 400       max_samples = 0;
 401       for (int i = 0; msaa_modes[i] != 0; ++i) {
 402          if (msaa_modes[i] <= clamp_max_samples) {
 403             max_samples = msaa_modes[i];
 404             break;
 405          }
 406       }
 407    }
 408
 409    ctx->Const.MaxSamples = max_samples;
 410    ctx->Const.MaxColorTextureSamples = max_samples;
 411    ctx->Const.MaxDepthTextureSamples = max_samples;
 412    ctx->Const.MaxIntegerSamples = max_samples;
 413
 414    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 415     * to map indices of rectangular grid to sample numbers within a pixel.
 416     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 417     * extension implementation. For more details see the comment above
 418     * gen6_set_sample_maps() definition.
 419     */
 420    gen6_set_sample_maps(ctx);
 421
 422    if (brw->gen >= 7)
 423       ctx->Const.MaxProgramTextureGatherComponents = 4;
 424    else if (brw->gen == 6)
 425       ctx->Const.MaxProgramTextureGatherComponents = 1;
 426
 427    ctx->Const.MinLineWidth = 1.0;
 428    ctx->Const.MinLineWidthAA = 1.0;
 429    if (brw->gen >= 9 || brw->is_cherryview) {
 430       ctx->Const.MaxLineWidth = 40.0;
 431       ctx->Const.MaxLineWidthAA = 40.0;
 432       ctx->Const.LineWidthGranularity = 0.125;
 433    } else if (brw->gen >= 6) {
 434       ctx->Const.MaxLineWidth = 7.375;
 435       ctx->Const.MaxLineWidthAA = 7.375;
 436       ctx->Const.LineWidthGranularity = 0.125;
 437    } else {
 438       ctx->Const.MaxLineWidth = 7.0;
 439       ctx->Const.MaxLineWidthAA = 7.0;
 440       ctx->Const.LineWidthGranularity = 0.5;
 441    }
 442
 443    ctx->Const.MinPointSize = 1.0;
 444    ctx->Const.MinPointSizeAA = 1.0;
 445    ctx->Const.MaxPointSize = 255.0;
 446    ctx->Const.MaxPointSizeAA = 255.0;
 447    ctx->Const.PointSizeGranularity = 1.0;
 448
 449    if (brw->gen >= 5 || brw->is_g4x)
 450       ctx->Const.MaxClipPlanes = 8;
 451
 452    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 453    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 454    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 455    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 456    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 457    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 458    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 459    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 460    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 461    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 462    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 463    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 464       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 465            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 466
 467    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 468    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 469    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 470    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 471    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 472    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 473    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 474    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 475    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 476       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 477            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 478
 479    /* Fragment shaders use real, 32-bit twos-complement integers for all
 480     * integer types.
 481     */
 482    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 483    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 484    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 485    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 486    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 487
 488    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 489    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 490    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 491    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 492    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 493
 494    if (brw->gen >= 7) {
 495       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 496       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 497       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 498       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 499       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 500       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 501       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 502       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 503       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 504    }
 505
 506    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 507     * but we're not sure how it's actually done for vertex order,
 508     * that affect provoking vertex decision. Always use last vertex
 509     * convention for quad primitive which works as expected for now.
 510     */
 511    if (brw->gen >= 6)
 512       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 513
 514    ctx->Const.NativeIntegers = true;
 515    ctx->Const.VertexID_is_zero_based = true;
 516
 517    /* Regarding the CMP instruction, the Ivybridge PRM says:
 518     *
 519     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 520     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 521     *    0xFFFFFFFF) is assigned to dst."
 522     *
 523     * but PRMs for earlier generations say
 524     *
 525     *   "In dword format, one GRF may store up to 8 results. When the register
 526     *    is used later as a vector of Booleans, as only LSB at each channel
 527     *    contains meaning [sic] data, software should make sure all higher bits
 528     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 529     *
 530     * We select the representation of a true boolean uniform to be ~0, and fix
 531     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 532     */
 533    ctx->Const.UniformBooleanTrue = ~0;
 534
 535    /* From the gen4 PRM, volume 4 page 127:
 536     *
 537     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 538     *      the base address of the first element of the surface, computed in
 539     *      software by adding the surface base address to the byte offset of
 540     *      the element in the buffer."
 541     *
 542     * However, unaligned accesses are slower, so enforce buffer alignment.
 543     */
 544    ctx->Const.UniformBufferOffsetAlignment = 16;
 545    ctx->Const.TextureBufferOffsetAlignment = 16;
 546
 547    if (brw->gen >= 6) {
 548       ctx->Const.MaxVarying = 32;
 549       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 550       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 551       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 552       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 553    }
 554
 555    static const nir_shader_compiler_options nir_options = {
 556       .native_integers = true,
 557       /* In order to help allow for better CSE at the NIR level we tell NIR
 558        * to split all ffma instructions during opt_algebraic and we then
 559        * re-combine them as a later step.
 560        */
 561       .lower_ffma = true,
 562       .lower_sub = true,
 563    };
 564
 565    /* We want the GLSL compiler to emit code that uses condition codes */
 566    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 567       ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 568       ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
 569       ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
 570       ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
 571       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 572       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
 573          (i == MESA_SHADER_FRAGMENT);
 574       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
 575          (i == MESA_SHADER_FRAGMENT);
 576       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 577       ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
 578    }
 579
 580    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 581    ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 582
 583    if (brw->scalar_vs) {
 584       /* If we're using the scalar backend for vertex shaders, we need to
 585        * configure these accordingly.
 586        */
 587       ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
 588       ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
 589       ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
 590
 591       if (brw_env_var_as_boolean("INTEL_USE_NIR", false))
 592          ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options;
 593    }
 594
 595    if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
 596       ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options;
 597
 598    ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = &nir_options;
 599
 600    /* ARB_viewport_array */
 601    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 602       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 603       ctx->Const.ViewportSubpixelBits = 0;
 604
 605       /* Cast to float before negating because MaxViewportWidth is unsigned.
 606        */
 607       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 608       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 609    }
 610
 611    /* ARB_gpu_shader5 */
 612    if (brw->gen >= 7)
 613       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 614 }
 615
 616 static void
 617 brw_adjust_cs_context_constants(struct brw_context *brw)
 618 {
 619    struct gl_context *ctx = &brw->ctx;
 620
 621    /* For ES, we set these constants based on SIMD8.
 622     *
 623     * TODO: Once we can always generate SIMD16, we should update this.
 624     *
 625     * For GL, we assume we can generate a SIMD16 program, but this currently
 626     * is not always true. This allows us to run more test cases, and will be
 627     * required based on desktop GL compute shader requirements.
 628     */
 629    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 630
 631    const uint32_t max_invocations = simd_size * brw->max_cs_threads;
 632    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 633    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 634    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 635    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 636 }
 637
 638 /**
 639  * Process driconf (drirc) options, setting appropriate context flags.
 640  *
 641  * intelInitExtensions still pokes at optionCache directly, in order to
 642  * avoid advertising various extensions.  No flags are set, so it makes
 643  * sense to continue doing that there.
 644  */
 645 static void
 646 brw_process_driconf_options(struct brw_context *brw)
 647 {
 648    struct gl_context *ctx = &brw->ctx;
 649
 650    driOptionCache *options = &brw->optionCache;
 651    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 652                        brw->driContext->driScreenPriv->myNum, "i965");
 653
 654    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 655    switch (bo_reuse_mode) {
 656    case DRI_CONF_BO_REUSE_DISABLED:
 657       break;
 658    case DRI_CONF_BO_REUSE_ALL:
 659       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 660       break;
 661    }
 662
 663    if (!driQueryOptionb(options, "hiz")) {
 664        brw->has_hiz = false;
 665        /* On gen6, you can only do separate stencil with HIZ. */
 666        if (brw->gen == 6)
 667           brw->has_separate_stencil = false;
 668    }
 669
 670    if (driQueryOptionb(options, "always_flush_batch")) {
 671       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 672       brw->always_flush_batch = true;
 673    }
 674
 675    if (driQueryOptionb(options, "always_flush_cache")) {
 676       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 677       brw->always_flush_cache = true;
 678    }
 679
 680    if (driQueryOptionb(options, "disable_throttling")) {
 681       fprintf(stderr, "disabling flush throttling\n");
 682       brw->disable_throttling = true;
 683    }
 684
 685    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 686
 687    ctx->Const.ForceGLSLExtensionsWarn =
 688       driQueryOptionb(options, "force_glsl_extensions_warn");
 689
 690    ctx->Const.DisableGLSLLineContinuations =
 691       driQueryOptionb(options, "disable_glsl_line_continuations");
 692
 693    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 694       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 695 }
 696
 697 GLboolean
 698 brwCreateContext(gl_api api,
 699                  const struct gl_config *mesaVis,
 700                  __DRIcontext *driContextPriv,
 701                  unsigned major_version,
 702                  unsigned minor_version,
 703                  uint32_t flags,
 704                  bool notify_reset,
 705                  unsigned *dri_ctx_error,
 706                  void *sharedContextPrivate)
 707 {
 708    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 709    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 710    struct intel_screen *screen = sPriv->driverPrivate;
 711    const struct brw_device_info *devinfo = screen->devinfo;
 712    struct dd_function_table functions;
 713
 714    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 715     * provides us with context reset notifications.
 716     */
 717    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 718       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 719
 720    if (screen->has_context_reset_notification)
 721       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 722
 723    if (flags & ~allowed_flags) {
 724       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 725       return false;
 726    }
 727
 728    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 729    if (!brw) {
 730       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 731       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 732       return false;
 733    }
 734
 735    driContextPriv->driverPrivate = brw;
 736    brw->driContext = driContextPriv;
 737    brw->intelScreen = screen;
 738    brw->bufmgr = screen->bufmgr;
 739
 740    brw->gen = devinfo->gen;
 741    brw->gt = devinfo->gt;
 742    brw->is_g4x = devinfo->is_g4x;
 743    brw->is_baytrail = devinfo->is_baytrail;
 744    brw->is_haswell = devinfo->is_haswell;
 745    brw->is_cherryview = devinfo->is_cherryview;
 746    brw->has_llc = devinfo->has_llc;
 747    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 748    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 749    brw->has_pln = devinfo->has_pln;
 750    brw->has_compr4 = devinfo->has_compr4;
 751    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 752    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 753    brw->needs_unlit_centroid_workaround =
 754       devinfo->needs_unlit_centroid_workaround;
 755
 756    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 757    brw->has_swizzling = screen->hw_has_swizzling;
 758
 759    brw->vs.base.stage = MESA_SHADER_VERTEX;
 760    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 761    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 762    if (brw->gen >= 8) {
 763       gen8_init_vtable_surface_functions(brw);
 764       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 765    } else if (brw->gen >= 7) {
 766       gen7_init_vtable_surface_functions(brw);
 767       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 768    } else if (brw->gen >= 6) {
 769       gen6_init_vtable_surface_functions(brw);
 770       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 771    } else {
 772       gen4_init_vtable_surface_functions(brw);
 773       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 774    }
 775
 776    brw_init_driver_functions(brw, &functions);
 777
 778    if (notify_reset)
 779       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 780
 781    struct gl_context *ctx = &brw->ctx;
 782
 783    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 784       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 785       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 786       intelDestroyContext(driContextPriv);
 787       return false;
 788    }
 789
 790    driContextSetFlags(ctx, flags);
 791
 792    /* Initialize the software rasterizer and helper modules.
 793     *
 794     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 795     * software fallbacks (which we have to support on legacy GL to do weird
 796     * glDrawPixels(), glBitmap(), and other functions).
 797     */
 798    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 799       _swrast_CreateContext(ctx);
 800    }
 801
 802    _vbo_CreateContext(ctx);
 803    if (ctx->swrast_context) {
 804       _tnl_CreateContext(ctx);
 805       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 806       _swsetup_CreateContext(ctx);
 807
 808       /* Configure swrast to match hardware characteristics: */
 809       _swrast_allow_pixel_fog(ctx, false);
 810       _swrast_allow_vertex_fog(ctx, true);
 811    }
 812
 813    _mesa_meta_init(ctx);
 814
 815    brw_process_driconf_options(brw);
 816    brw_process_intel_debug_variable(brw);
 817
 818    if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
 819       brw->scalar_vs = true;
 820
 821    brw_initialize_context_constants(brw);
 822
 823    ctx->Const.ResetStrategy = notify_reset
 824       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 825
 826    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 827    _mesa_init_point(ctx);
 828
 829    intel_fbo_init(brw);
 830
 831    intel_batchbuffer_init(brw);
 832
 833    if (brw->gen >= 6) {
 834       /* Create a new hardware context.  Using a hardware context means that
 835        * our GPU state will be saved/restored on context switch, allowing us
 836        * to assume that the GPU is in the same state we left it in.
 837        *
 838        * This is required for transform feedback buffer offsets, query objects,
 839        * and also allows us to reduce how much state we have to emit.
 840        */
 841       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 842
 843       if (!brw->hw_ctx) {
 844          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 845          intelDestroyContext(driContextPriv);
 846          return false;
 847       }
 848    }
 849
 850    brw_init_state(brw);
 851
 852    intelInitExtensions(ctx);
 853
 854    brw_init_surface_formats(brw);
 855
 856    brw->max_vs_threads = devinfo->max_vs_threads;
 857    brw->max_hs_threads = devinfo->max_hs_threads;
 858    brw->max_ds_threads = devinfo->max_ds_threads;
 859    brw->max_gs_threads = devinfo->max_gs_threads;
 860    brw->max_wm_threads = devinfo->max_wm_threads;
 861    brw->max_cs_threads = devinfo->max_cs_threads;
 862    brw->urb.size = devinfo->urb.size;
 863    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 864    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 865    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 866    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 867    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 868
 869    brw_adjust_cs_context_constants(brw);
 870
 871    /* Estimate the size of the mappable aperture into the GTT.  There's an
 872     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 873     * It turns out it's basically always 256MB, though some ancient hardware
 874     * was smaller.
 875     */
 876    uint32_t gtt_size = 256 * 1024 * 1024;
 877
 878    /* We don't want to map two objects such that a memcpy between them would
 879     * just fault one mapping in and then the other over and over forever.  So
 880     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 881     * taken up by things like the framebuffer and the ringbuffer and such, so
 882     * be more conservative.
 883     */
 884    brw->max_gtt_map_object_size = gtt_size / 4;
 885
 886    if (brw->gen == 6)
 887       brw->urb.gs_present = false;
 888
 889    brw->prim_restart.in_progress = false;
 890    brw->prim_restart.enable_cut_index = false;
 891    brw->gs.enabled = false;
 892    brw->sf.viewport_transform_enable = true;
 893
 894    ctx->VertexProgram._MaintainTnlProgram = true;
 895    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 896
 897    brw_draw_init( brw );
 898
 899    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 900       /* Turn on some extra GL_ARB_debug_output generation. */
 901       brw->perf_debug = true;
 902    }
 903
 904    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 905       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 906
 907    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 908       brw_init_shader_time(brw);
 909
 910    _mesa_compute_version(ctx);
 911
 912    _mesa_initialize_dispatch_tables(ctx);
 913    _mesa_initialize_vbo_vtxfmt(ctx);
 914
 915    if (ctx->Extensions.AMD_performance_monitor) {
 916       brw_init_performance_monitors(brw);
 917    }
 918
 919    vbo_use_buffer_objects(ctx);
 920    vbo_always_unmap_buffers(ctx);
 921
 922    return true;
 923 }
 924
 925 void
 926 intelDestroyContext(__DRIcontext * driContextPriv)
 927 {
 928    struct brw_context *brw =
 929       (struct brw_context *) driContextPriv->driverPrivate;
 930    struct gl_context *ctx = &brw->ctx;
 931
 932    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 933    if (INTEL_DEBUG & DEBUG_AUB) {
 934       intel_batchbuffer_flush(brw);
 935       aub_dump_bmp(&brw->ctx);
 936    }
 937
 938    _mesa_meta_free(&brw->ctx);
 939    brw_meta_fast_clear_free(brw);
 940
 941    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 942       /* Force a report. */
 943       brw->shader_time.report_time = 0;
 944
 945       brw_collect_and_report_shader_time(brw);
 946       brw_destroy_shader_time(brw);
 947    }
 948
 949    brw_destroy_state(brw);
 950    brw_draw_destroy(brw);
 951
 952    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 953    if (brw->vs.base.scratch_bo)
 954       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
 955    if (brw->gs.base.scratch_bo)
 956       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
 957    if (brw->wm.base.scratch_bo)
 958       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 959
 960    drm_intel_gem_context_destroy(brw->hw_ctx);
 961
 962    if (ctx->swrast_context) {
 963       _swsetup_DestroyContext(&brw->ctx);
 964       _tnl_DestroyContext(&brw->ctx);
 965    }
 966    _vbo_DestroyContext(&brw->ctx);
 967
 968    if (ctx->swrast_context)
 969       _swrast_DestroyContext(&brw->ctx);
 970
 971    intel_batchbuffer_free(brw);
 972
 973    drm_intel_bo_unreference(brw->throttle_batch[1]);
 974    drm_intel_bo_unreference(brw->throttle_batch[0]);
 975    brw->throttle_batch[1] = NULL;
 976    brw->throttle_batch[0] = NULL;
 977
 978    driDestroyOptionCache(&brw->optionCache);
 979
 980    /* free the Mesa context */
 981    _mesa_free_context_data(&brw->ctx);
 982
 983    ralloc_free(brw);
 984    driContextPriv->driverPrivate = NULL;
 985 }
 986
 987 GLboolean
 988 intelUnbindContext(__DRIcontext * driContextPriv)
 989 {
 990    /* Unset current context and dispath table */
 991    _mesa_make_current(NULL, NULL, NULL);
 992
 993    return true;
 994 }
 995
 996 /**
 997  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 998  * on window system framebuffers.
 999  *
1000  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1001  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1002  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1003  * for a visual where you're guaranteed to be capable, but it turns out that
1004  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1005  * incapable ones, because there's no difference between the two in resources
1006  * used.  Applications thus get built that accidentally rely on the default
1007  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1008  * great...
1009  *
1010  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1011  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1012  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1013  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1014  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1015  * and get no sRGB encode (assuming that both kinds of visual are available).
1016  * Thus our choice to support sRGB by default on our visuals for desktop would
1017  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1018  *
1019  * Unfortunately, renderbuffer setup happens before a context is created.  So
1020  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1021  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1022  * yet), we go turn that back off before anyone finds out.
1023  */
1024 static void
1025 intel_gles3_srgb_workaround(struct brw_context *brw,
1026                             struct gl_framebuffer *fb)
1027 {
1028    struct gl_context *ctx = &brw->ctx;
1029
1030    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1031       return;
1032
1033    /* Some day when we support the sRGB capable bit on visuals available for
1034     * GLES, we'll need to respect that and not disable things here.
1035     */
1036    fb->Visual.sRGBCapable = false;
1037    for (int i = 0; i < BUFFER_COUNT; i++) {
1038       if (fb->Attachment[i].Renderbuffer &&
1039           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1040          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1041       }
1042    }
1043 }
1044
1045 GLboolean
1046 intelMakeCurrent(__DRIcontext * driContextPriv,
1047                  __DRIdrawable * driDrawPriv,
1048                  __DRIdrawable * driReadPriv)
1049 {
1050    struct brw_context *brw;
1051    GET_CURRENT_CONTEXT(curCtx);
1052
1053    if (driContextPriv)
1054       brw = (struct brw_context *) driContextPriv->driverPrivate;
1055    else
1056       brw = NULL;
1057
1058    /* According to the glXMakeCurrent() man page: "Pending commands to
1059     * the previous context, if any, are flushed before it is released."
1060     * But only flush if we're actually changing contexts.
1061     */
1062    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1063       _mesa_flush(curCtx);
1064    }
1065
1066    if (driContextPriv) {
1067       struct gl_context *ctx = &brw->ctx;
1068       struct gl_framebuffer *fb, *readFb;
1069
1070       if (driDrawPriv == NULL) {
1071          fb = _mesa_get_incomplete_framebuffer();
1072       } else {
1073          fb = driDrawPriv->driverPrivate;
1074          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1075       }
1076
1077       if (driReadPriv == NULL) {
1078          readFb = _mesa_get_incomplete_framebuffer();
1079       } else {
1080          readFb = driReadPriv->driverPrivate;
1081          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1082       }
1083
1084       /* The sRGB workaround changes the renderbuffer's format. We must change
1085        * the format before the renderbuffer's miptree get's allocated, otherwise
1086        * the formats of the renderbuffer and its miptree will differ.
1087        */
1088       intel_gles3_srgb_workaround(brw, fb);
1089       intel_gles3_srgb_workaround(brw, readFb);
1090
1091       /* If the context viewport hasn't been initialized, force a call out to
1092        * the loader to get buffers so we have a drawable size for the initial
1093        * viewport. */
1094       if (!brw->ctx.ViewportInitialized)
1095          intel_prepare_render(brw);
1096
1097       _mesa_make_current(ctx, fb, readFb);
1098    } else {
1099       _mesa_make_current(NULL, NULL, NULL);
1100    }
1101
1102    return true;
1103 }
1104
1105 void
1106 intel_resolve_for_dri2_flush(struct brw_context *brw,
1107                              __DRIdrawable *drawable)
1108 {
1109    if (brw->gen < 6) {
1110       /* MSAA and fast color clear are not supported, so don't waste time
1111        * checking whether a resolve is needed.
1112        */
1113       return;
1114    }
1115
1116    struct gl_framebuffer *fb = drawable->driverPrivate;
1117    struct intel_renderbuffer *rb;
1118
1119    /* Usually, only the back buffer will need to be downsampled. However,
1120     * the front buffer will also need it if the user has rendered into it.
1121     */
1122    static const gl_buffer_index buffers[2] = {
1123          BUFFER_BACK_LEFT,
1124          BUFFER_FRONT_LEFT,
1125    };
1126
1127    for (int i = 0; i < 2; ++i) {
1128       rb = intel_get_renderbuffer(fb, buffers[i]);
1129       if (rb == NULL || rb->mt == NULL)
1130          continue;
1131       if (rb->mt->num_samples <= 1)
1132          intel_miptree_resolve_color(brw, rb->mt);
1133       else
1134          intel_renderbuffer_downsample(brw, rb);
1135    }
1136 }
1137
1138 static unsigned
1139 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1140 {
1141    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1142 }
1143
1144 static void
1145 intel_query_dri2_buffers(struct brw_context *brw,
1146                          __DRIdrawable *drawable,
1147                          __DRIbuffer **buffers,
1148                          int *count);
1149
1150 static void
1151 intel_process_dri2_buffer(struct brw_context *brw,
1152                           __DRIdrawable *drawable,
1153                           __DRIbuffer *buffer,
1154                           struct intel_renderbuffer *rb,
1155                           const char *buffer_name);
1156
1157 static void
1158 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1159
1160 static void
1161 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1162 {
1163    struct gl_framebuffer *fb = drawable->driverPrivate;
1164    struct intel_renderbuffer *rb;
1165    __DRIbuffer *buffers = NULL;
1166    int i, count;
1167    const char *region_name;
1168
1169    /* Set this up front, so that in case our buffers get invalidated
1170     * while we're getting new buffers, we don't clobber the stamp and
1171     * thus ignore the invalidate. */
1172    drawable->lastStamp = drawable->dri2.stamp;
1173
1174    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1175       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1176
1177    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1178
1179    if (buffers == NULL)
1180       return;
1181
1182    for (i = 0; i < count; i++) {
1183        switch (buffers[i].attachment) {
1184        case __DRI_BUFFER_FRONT_LEFT:
1185            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1186            region_name = "dri2 front buffer";
1187            break;
1188
1189        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1190            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1191            region_name = "dri2 fake front buffer";
1192            break;
1193
1194        case __DRI_BUFFER_BACK_LEFT:
1195            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1196            region_name = "dri2 back buffer";
1197            break;
1198
1199        case __DRI_BUFFER_DEPTH:
1200        case __DRI_BUFFER_HIZ:
1201        case __DRI_BUFFER_DEPTH_STENCIL:
1202        case __DRI_BUFFER_STENCIL:
1203        case __DRI_BUFFER_ACCUM:
1204        default:
1205            fprintf(stderr,
1206                    "unhandled buffer attach event, attachment type %d\n",
1207                    buffers[i].attachment);
1208            return;
1209        }
1210
1211        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1212    }
1213
1214 }
1215
1216 void
1217 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1218 {
1219    struct brw_context *brw = context->driverPrivate;
1220    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1221
1222    /* Set this up front, so that in case our buffers get invalidated
1223     * while we're getting new buffers, we don't clobber the stamp and
1224     * thus ignore the invalidate. */
1225    drawable->lastStamp = drawable->dri2.stamp;
1226
1227    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1228       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1229
1230    if (screen->image.loader)
1231       intel_update_image_buffers(brw, drawable);
1232    else
1233       intel_update_dri2_buffers(brw, drawable);
1234
1235    driUpdateFramebufferSize(&brw->ctx, drawable);
1236 }
1237
1238 /**
1239  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1240  * state is required.
1241  */
1242 void
1243 intel_prepare_render(struct brw_context *brw)
1244 {
1245    struct gl_context *ctx = &brw->ctx;
1246    __DRIcontext *driContext = brw->driContext;
1247    __DRIdrawable *drawable;
1248
1249    drawable = driContext->driDrawablePriv;
1250    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1251       if (drawable->lastStamp != drawable->dri2.stamp)
1252          intel_update_renderbuffers(driContext, drawable);
1253       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1254    }
1255
1256    drawable = driContext->driReadablePriv;
1257    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1258       if (drawable->lastStamp != drawable->dri2.stamp)
1259          intel_update_renderbuffers(driContext, drawable);
1260       driContext->dri2.read_stamp = drawable->dri2.stamp;
1261    }
1262
1263    /* If we're currently rendering to the front buffer, the rendering
1264     * that will happen next will probably dirty the front buffer.  So
1265     * mark it as dirty here.
1266     */
1267    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1268       brw->front_buffer_dirty = true;
1269 }
1270
1271 /**
1272  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1273  *
1274  * To determine which DRI buffers to request, examine the renderbuffers
1275  * attached to the drawable's framebuffer. Then request the buffers with
1276  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1277  *
1278  * This is called from intel_update_renderbuffers().
1279  *
1280  * \param drawable      Drawable whose buffers are queried.
1281  * \param buffers       [out] List of buffers returned by DRI2 query.
1282  * \param buffer_count  [out] Number of buffers returned.
1283  *
1284  * \see intel_update_renderbuffers()
1285  * \see DRI2GetBuffers()
1286  * \see DRI2GetBuffersWithFormat()
1287  */
1288 static void
1289 intel_query_dri2_buffers(struct brw_context *brw,
1290                          __DRIdrawable *drawable,
1291                          __DRIbuffer **buffers,
1292                          int *buffer_count)
1293 {
1294    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1295    struct gl_framebuffer *fb = drawable->driverPrivate;
1296    int i = 0;
1297    unsigned attachments[8];
1298
1299    struct intel_renderbuffer *front_rb;
1300    struct intel_renderbuffer *back_rb;
1301
1302    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1303    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1304
1305    memset(attachments, 0, sizeof(attachments));
1306    if ((brw_is_front_buffer_drawing(fb) ||
1307         brw_is_front_buffer_reading(fb) ||
1308         !back_rb) && front_rb) {
1309       /* If a fake front buffer is in use, then querying for
1310        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1311        * the real front buffer to the fake front buffer.  So before doing the
1312        * query, we need to make sure all the pending drawing has landed in the
1313        * real front buffer.
1314        */
1315       intel_batchbuffer_flush(brw);
1316       intel_flush_front(&brw->ctx);
1317
1318       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1319       attachments[i++] = intel_bits_per_pixel(front_rb);
1320    } else if (front_rb && brw->front_buffer_dirty) {
1321       /* We have pending front buffer rendering, but we aren't querying for a
1322        * front buffer.  If the front buffer we have is a fake front buffer,
1323        * the X server is going to throw it away when it processes the query.
1324        * So before doing the query, make sure all the pending drawing has
1325        * landed in the real front buffer.
1326        */
1327       intel_batchbuffer_flush(brw);
1328       intel_flush_front(&brw->ctx);
1329    }
1330
1331    if (back_rb) {
1332       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1333       attachments[i++] = intel_bits_per_pixel(back_rb);
1334    }
1335
1336    assert(i <= ARRAY_SIZE(attachments));
1337
1338    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1339                                                         &drawable->w,
1340                                                         &drawable->h,
1341                                                         attachments, i / 2,
1342                                                         buffer_count,
1343                                                         drawable->loaderPrivate);
1344 }
1345
1346 /**
1347  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1348  *
1349  * This is called from intel_update_renderbuffers().
1350  *
1351  * \par Note:
1352  *    DRI buffers whose attachment point is DRI2BufferStencil or
1353  *    DRI2BufferDepthStencil are handled as special cases.
1354  *
1355  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1356  *        that is passed to drm_intel_bo_gem_create_from_name().
1357  *
1358  * \see intel_update_renderbuffers()
1359  */
1360 static void
1361 intel_process_dri2_buffer(struct brw_context *brw,
1362                           __DRIdrawable *drawable,
1363                           __DRIbuffer *buffer,
1364                           struct intel_renderbuffer *rb,
1365                           const char *buffer_name)
1366 {
1367    struct gl_framebuffer *fb = drawable->driverPrivate;
1368    drm_intel_bo *bo;
1369
1370    if (!rb)
1371       return;
1372
1373    unsigned num_samples = rb->Base.Base.NumSamples;
1374
1375    /* We try to avoid closing and reopening the same BO name, because the first
1376     * use of a mapping of the buffer involves a bunch of page faulting which is
1377     * moderately expensive.
1378     */
1379    struct intel_mipmap_tree *last_mt;
1380    if (num_samples == 0)
1381       last_mt = rb->mt;
1382    else
1383       last_mt = rb->singlesample_mt;
1384
1385    uint32_t old_name = 0;
1386    if (last_mt) {
1387        /* The bo already has a name because the miptree was created by a
1388         * previous call to intel_process_dri2_buffer(). If a bo already has a
1389         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1390         * create a new name.
1391         */
1392       drm_intel_bo_flink(last_mt->bo, &old_name);
1393    }
1394
1395    if (old_name == buffer->name)
1396       return;
1397
1398    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1399       fprintf(stderr,
1400               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1401               buffer->name, buffer->attachment,
1402               buffer->cpp, buffer->pitch);
1403    }
1404
1405    intel_miptree_release(&rb->mt);
1406    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1407                                           buffer->name);
1408    if (!bo) {
1409       fprintf(stderr,
1410               "Failed to open BO for returned DRI2 buffer "
1411               "(%dx%d, %s, named %d).\n"
1412               "This is likely a bug in the X Server that will lead to a "
1413               "crash soon.\n",
1414               drawable->w, drawable->h, buffer_name, buffer->name);
1415       return;
1416    }
1417
1418    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1419                                             drawable->w, drawable->h,
1420                                             buffer->pitch);
1421
1422    if (brw_is_front_buffer_drawing(fb) &&
1423        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1424         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1425        rb->Base.Base.NumSamples > 1) {
1426       intel_renderbuffer_upsample(brw, rb);
1427    }
1428
1429    assert(rb->mt);
1430
1431    drm_intel_bo_unreference(bo);
1432 }
1433
1434 /**
1435  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1436  *
1437  * To determine which DRI buffers to request, examine the renderbuffers
1438  * attached to the drawable's framebuffer. Then request the buffers from
1439  * the image loader
1440  *
1441  * This is called from intel_update_renderbuffers().
1442  *
1443  * \param drawable      Drawable whose buffers are queried.
1444  * \param buffers       [out] List of buffers returned by DRI2 query.
1445  * \param buffer_count  [out] Number of buffers returned.
1446  *
1447  * \see intel_update_renderbuffers()
1448  */
1449
1450 static void
1451 intel_update_image_buffer(struct brw_context *intel,
1452                           __DRIdrawable *drawable,
1453                           struct intel_renderbuffer *rb,
1454                           __DRIimage *buffer,
1455                           enum __DRIimageBufferMask buffer_type)
1456 {
1457    struct gl_framebuffer *fb = drawable->driverPrivate;
1458
1459    if (!rb || !buffer->bo)
1460       return;
1461
1462    unsigned num_samples = rb->Base.Base.NumSamples;
1463
1464    /* Check and see if we're already bound to the right
1465     * buffer object
1466     */
1467    struct intel_mipmap_tree *last_mt;
1468    if (num_samples == 0)
1469       last_mt = rb->mt;
1470    else
1471       last_mt = rb->singlesample_mt;
1472
1473    if (last_mt && last_mt->bo == buffer->bo)
1474       return;
1475
1476    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1477                                             buffer->width, buffer->height,
1478                                             buffer->pitch);
1479
1480    if (brw_is_front_buffer_drawing(fb) &&
1481        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1482        rb->Base.Base.NumSamples > 1) {
1483       intel_renderbuffer_upsample(intel, rb);
1484    }
1485 }
1486
1487 static void
1488 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1489 {
1490    struct gl_framebuffer *fb = drawable->driverPrivate;
1491    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1492    struct intel_renderbuffer *front_rb;
1493    struct intel_renderbuffer *back_rb;
1494    struct __DRIimageList images;
1495    unsigned int format;
1496    uint32_t buffer_mask = 0;
1497
1498    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1499    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1500
1501    if (back_rb)
1502       format = intel_rb_format(back_rb);
1503    else if (front_rb)
1504       format = intel_rb_format(front_rb);
1505    else
1506       return;
1507
1508    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1509                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1510       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1511    }
1512
1513    if (back_rb)
1514       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1515
1516    (*screen->image.loader->getBuffers) (drawable,
1517                                         driGLFormatToImageFormat(format),
1518                                         &drawable->dri2.stamp,
1519                                         drawable->loaderPrivate,
1520                                         buffer_mask,
1521                                         &images);
1522
1523    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1524       drawable->w = images.front->width;
1525       drawable->h = images.front->height;
1526       intel_update_image_buffer(brw,
1527                                 drawable,
1528                                 front_rb,
1529                                 images.front,
1530                                 __DRI_IMAGE_BUFFER_FRONT);
1531    }
1532    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1533       drawable->w = images.back->width;
1534       drawable->h = images.back->height;
1535       intel_update_image_buffer(brw,
1536                                 drawable,
1537                                 back_rb,
1538                                 images.back,
1539                                 __DRI_IMAGE_BUFFER_BACK);
1540    }
1541 }