src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44
  45 #include "vbo/vbo_context.h"
  46
  47 #include "drivers/common/driverfuncs.h"
  48 #include "drivers/common/meta.h"
  49 #include "utils.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_defines.h"
  53 #include "brw_draw.h"
  54 #include "brw_state.h"
  55
  56 #include "intel_batchbuffer.h"
  57 #include "intel_buffer_objects.h"
  58 #include "intel_buffers.h"
  59 #include "intel_fbo.h"
  60 #include "intel_mipmap_tree.h"
  61 #include "intel_pixel.h"
  62 #include "intel_image.h"
  63 #include "intel_tex.h"
  64 #include "intel_tex_obj.h"
  65
  66 #include "swrast_setup/swrast_setup.h"
  67 #include "tnl/tnl.h"
  68 #include "tnl/t_pipeline.h"
  69 #include "util/ralloc.h"
  70
  71 /***************************************
  72  * Mesa's Driver Functions
  73  ***************************************/
  74
  75 static size_t
  76 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  77                              GLenum internalFormat, int samples[16])
  78 {
  79    struct brw_context *brw = brw_context(ctx);
  80
  81    (void) target;
  82
  83    switch (brw->gen) {
  84    case 8:
  85       samples[0] = 8;
  86       samples[1] = 4;
  87       samples[2] = 2;
  88       return 3;
  89
  90    case 7:
  91       samples[0] = 8;
  92       samples[1] = 4;
  93       return 2;
  94
  95    case 6:
  96       samples[0] = 4;
  97       return 1;
  98
  99    default:
 100       samples[0] = 1;
 101       return 1;
 102    }
 103 }
 104
 105 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 106
 107 const char *
 108 brw_get_renderer_string(unsigned deviceID)
 109 {
 110    const char *chipset;
 111    static char buffer[128];
 112
 113    switch (deviceID) {
 114 #undef CHIPSET
 115 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 116 #include "pci_ids/i965_pci_ids.h"
 117    default:
 118       chipset = "Unknown Intel Chipset";
 119       break;
 120    }
 121
 122    (void) driGetRendererString(buffer, chipset, 0);
 123    return buffer;
 124 }
 125
 126 static const GLubyte *
 127 intel_get_string(struct gl_context * ctx, GLenum name)
 128 {
 129    const struct brw_context *const brw = brw_context(ctx);
 130
 131    switch (name) {
 132    case GL_VENDOR:
 133       return (GLubyte *) brw_vendor_string;
 134
 135    case GL_RENDERER:
 136       return
 137          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 138
 139    default:
 140       return NULL;
 141    }
 142 }
 143
 144 static void
 145 intel_viewport(struct gl_context *ctx)
 146 {
 147    struct brw_context *brw = brw_context(ctx);
 148    __DRIcontext *driContext = brw->driContext;
 149
 150    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 151       dri2InvalidateDrawable(driContext->driDrawablePriv);
 152       dri2InvalidateDrawable(driContext->driReadablePriv);
 153    }
 154 }
 155
 156 static void
 157 intel_update_state(struct gl_context * ctx, GLuint new_state)
 158 {
 159    struct brw_context *brw = brw_context(ctx);
 160    struct intel_texture_object *tex_obj;
 161    struct intel_renderbuffer *depth_irb;
 162
 163    if (ctx->swrast_context)
 164       _swrast_InvalidateState(ctx, new_state);
 165    _vbo_InvalidateState(ctx, new_state);
 166
 167    brw->NewGLState |= new_state;
 168
 169    _mesa_unlock_context_textures(ctx);
 170
 171    /* Resolve the depth buffer's HiZ buffer. */
 172    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 173    if (depth_irb)
 174       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 175
 176    /* Resolve depth buffer and render cache of each enabled texture. */
 177    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 178    for (int i = 0; i <= maxEnabledUnit; i++) {
 179       if (!ctx->Texture.Unit[i]._Current)
 180          continue;
 181       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 182       if (!tex_obj || !tex_obj->mt)
 183          continue;
 184       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 185       intel_miptree_resolve_color(brw, tex_obj->mt);
 186       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 187    }
 188
 189    _mesa_lock_context_textures(ctx);
 190 }
 191
 192 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 193
 194 static void
 195 intel_flush_front(struct gl_context *ctx)
 196 {
 197    struct brw_context *brw = brw_context(ctx);
 198    __DRIcontext *driContext = brw->driContext;
 199    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 200    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 201
 202    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 203       if (flushFront(screen) && driDrawable &&
 204           driDrawable->loaderPrivate) {
 205
 206          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 207           *
 208           * This potentially resolves both front and back buffer. It
 209           * is unnecessary to resolve the back, but harms nothing except
 210           * performance. And no one cares about front-buffer render
 211           * performance.
 212           */
 213          intel_resolve_for_dri2_flush(brw, driDrawable);
 214          intel_batchbuffer_flush(brw);
 215
 216          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 217
 218          /* We set the dirty bit in intel_prepare_render() if we're
 219           * front buffer rendering once we get there.
 220           */
 221          brw->front_buffer_dirty = false;
 222       }
 223    }
 224 }
 225
 226 static void
 227 intel_glFlush(struct gl_context *ctx)
 228 {
 229    struct brw_context *brw = brw_context(ctx);
 230
 231    intel_batchbuffer_flush(brw);
 232    intel_flush_front(ctx);
 233    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
 234       brw->need_throttle = true;
 235 }
 236
 237 static void
 238 intel_finish(struct gl_context * ctx)
 239 {
 240    struct brw_context *brw = brw_context(ctx);
 241
 242    intel_glFlush(ctx);
 243
 244    if (brw->batch.last_bo)
 245       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 246 }
 247
 248 static void
 249 brw_init_driver_functions(struct brw_context *brw,
 250                           struct dd_function_table *functions)
 251 {
 252    _mesa_init_driver_functions(functions);
 253
 254    /* GLX uses DRI2 invalidate events to handle window resizing.
 255     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 256     * which doesn't provide a mechanism for snooping the event queues.
 257     *
 258     * So EGL still relies on viewport hacks to handle window resizing.
 259     * This should go away with DRI3000.
 260     */
 261    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 262       functions->Viewport = intel_viewport;
 263
 264    functions->Flush = intel_glFlush;
 265    functions->Finish = intel_finish;
 266    functions->GetString = intel_get_string;
 267    functions->UpdateState = intel_update_state;
 268
 269    intelInitTextureFuncs(functions);
 270    intelInitTextureImageFuncs(functions);
 271    intelInitTextureSubImageFuncs(functions);
 272    intelInitTextureCopyImageFuncs(functions);
 273    intelInitCopyImageFuncs(functions);
 274    intelInitClearFuncs(functions);
 275    intelInitBufferFuncs(functions);
 276    intelInitPixelFuncs(functions);
 277    intelInitBufferObjectFuncs(functions);
 278    intel_init_syncobj_functions(functions);
 279    brw_init_object_purgeable_functions(functions);
 280
 281    brwInitFragProgFuncs( functions );
 282    brw_init_common_queryobj_functions(functions);
 283    if (brw->gen >= 6)
 284       gen6_init_queryobj_functions(functions);
 285    else
 286       gen4_init_queryobj_functions(functions);
 287
 288    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 289
 290    functions->NewTransformFeedback = brw_new_transform_feedback;
 291    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 292    functions->GetTransformFeedbackVertexCount =
 293       brw_get_transform_feedback_vertex_count;
 294    if (brw->gen >= 7) {
 295       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 296       functions->EndTransformFeedback = gen7_end_transform_feedback;
 297       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 298       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 299    } else {
 300       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 301       functions->EndTransformFeedback = brw_end_transform_feedback;
 302    }
 303
 304    if (brw->gen >= 6)
 305       functions->GetSamplePosition = gen6_get_sample_position;
 306 }
 307
 308 static void
 309 brw_initialize_context_constants(struct brw_context *brw)
 310 {
 311    struct gl_context *ctx = &brw->ctx;
 312
 313    unsigned max_samplers =
 314       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 315
 316    ctx->Const.QueryCounterBits.Timestamp = 36;
 317
 318    ctx->Const.StripTextureBorder = true;
 319
 320    ctx->Const.MaxDualSourceDrawBuffers = 1;
 321    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 322    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
 323    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 324    ctx->Const.MaxTextureUnits =
 325       MIN2(ctx->Const.MaxTextureCoordUnits,
 326            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 327    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
 328    if (brw->gen >= 6)
 329       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
 330    else
 331       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 332    if (_mesa_extension_override_enables.ARB_compute_shader) {
 333       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
 334       ctx->Const.MaxUniformBufferBindings += 12;
 335    } else {
 336       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
 337    }
 338    ctx->Const.MaxCombinedTextureImageUnits =
 339       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
 340       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
 341       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
 342       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 343
 344    ctx->Const.MaxTextureLevels = 14; /* 8192 */
 345    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
 346       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
 347    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 348    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 349    ctx->Const.MaxTextureMbytes = 1536;
 350
 351    if (brw->gen >= 7)
 352       ctx->Const.MaxArrayTextureLayers = 2048;
 353    else
 354       ctx->Const.MaxArrayTextureLayers = 512;
 355
 356    ctx->Const.MaxTextureRectSize = 1 << 12;
 357
 358    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 359
 360    ctx->Const.MaxRenderbufferSize = 8192;
 361
 362    /* Hardware only supports a limited number of transform feedback buffers.
 363     * So we need to override the Mesa default (which is based only on software
 364     * limits).
 365     */
 366    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 367
 368    /* On Gen6, in the worst case, we use up one binding table entry per
 369     * transform feedback component (see comments above the definition of
 370     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 371     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 372     * BRW_MAX_SOL_BINDINGS.
 373     *
 374     * In "separate components" mode, we need to divide this value by
 375     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 376     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 377     */
 378    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 379    ctx->Const.MaxTransformFeedbackSeparateComponents =
 380       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 381
 382    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 383
 384    int max_samples;
 385    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 386    const int clamp_max_samples =
 387       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 388
 389    if (clamp_max_samples < 0) {
 390       max_samples = msaa_modes[0];
 391    } else {
 392       /* Select the largest supported MSAA mode that does not exceed
 393        * clamp_max_samples.
 394        */
 395       max_samples = 0;
 396       for (int i = 0; msaa_modes[i] != 0; ++i) {
 397          if (msaa_modes[i] <= clamp_max_samples) {
 398             max_samples = msaa_modes[i];
 399             break;
 400          }
 401       }
 402    }
 403
 404    ctx->Const.MaxSamples = max_samples;
 405    ctx->Const.MaxColorTextureSamples = max_samples;
 406    ctx->Const.MaxDepthTextureSamples = max_samples;
 407    ctx->Const.MaxIntegerSamples = max_samples;
 408
 409    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 410     * to map indices of rectangular grid to sample numbers within a pixel.
 411     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 412     * extension implementation. For more details see the comment above
 413     * gen6_set_sample_maps() definition.
 414     */
 415    gen6_set_sample_maps(ctx);
 416
 417    if (brw->gen >= 7)
 418       ctx->Const.MaxProgramTextureGatherComponents = 4;
 419    else if (brw->gen == 6)
 420       ctx->Const.MaxProgramTextureGatherComponents = 1;
 421
 422    ctx->Const.MinLineWidth = 1.0;
 423    ctx->Const.MinLineWidthAA = 1.0;
 424    if (brw->gen >= 6) {
 425       ctx->Const.MaxLineWidth = 7.875;
 426       ctx->Const.MaxLineWidthAA = 7.875;
 427       ctx->Const.LineWidthGranularity = 0.125;
 428    } else {
 429       ctx->Const.MaxLineWidth = 7.0;
 430       ctx->Const.MaxLineWidthAA = 7.0;
 431       ctx->Const.LineWidthGranularity = 0.5;
 432    }
 433
 434    ctx->Const.MinPointSize = 1.0;
 435    ctx->Const.MinPointSizeAA = 1.0;
 436    ctx->Const.MaxPointSize = 255.0;
 437    ctx->Const.MaxPointSizeAA = 255.0;
 438    ctx->Const.PointSizeGranularity = 1.0;
 439
 440    if (brw->gen >= 5 || brw->is_g4x)
 441       ctx->Const.MaxClipPlanes = 8;
 442
 443    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 444    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 445    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 446    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 447    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 448    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 449    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 450    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 451    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 452    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 453    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 454    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 455       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 456            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 457
 458    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 459    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 460    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 461    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 462    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 463    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 464    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 465    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 466    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 467       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 468            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 469
 470    /* Fragment shaders use real, 32-bit twos-complement integers for all
 471     * integer types.
 472     */
 473    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 474    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 475    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 476    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 477    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 478
 479    if (brw->gen >= 7) {
 480       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 481       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 482       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 483       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 484       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
 485       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
 486       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
 487       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
 488       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
 489    }
 490
 491    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 492     * but we're not sure how it's actually done for vertex order,
 493     * that affect provoking vertex decision. Always use last vertex
 494     * convention for quad primitive which works as expected for now.
 495     */
 496    if (brw->gen >= 6)
 497       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 498
 499    ctx->Const.NativeIntegers = true;
 500    ctx->Const.VertexID_is_zero_based = true;
 501
 502    /* Regarding the CMP instruction, the Ivybridge PRM says:
 503     *
 504     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 505     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 506     *    0xFFFFFFFF) is assigned to dst."
 507     *
 508     * but PRMs for earlier generations say
 509     *
 510     *   "In dword format, one GRF may store up to 8 results. When the register
 511     *    is used later as a vector of Booleans, as only LSB at each channel
 512     *    contains meaning [sic] data, software should make sure all higher bits
 513     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 514     *
 515     * We select the representation of a true boolean uniform to match what the
 516     * CMP instruction returns.
 517     *
 518     * The Sandybridge BSpec's description of the CMP instruction matches that
 519     * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems
 520     * to have not been updated from Ironlake). Its CMP instruction behaves like
 521     * Ivybridge and newer.
 522     */
 523    if (brw->gen >= 6)
 524       ctx->Const.UniformBooleanTrue = ~0;
 525    else
 526       ctx->Const.UniformBooleanTrue = 1;
 527
 528    /* From the gen4 PRM, volume 4 page 127:
 529     *
 530     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 531     *      the base address of the first element of the surface, computed in
 532     *      software by adding the surface base address to the byte offset of
 533     *      the element in the buffer."
 534     *
 535     * However, unaligned accesses are slower, so enforce buffer alignment.
 536     */
 537    ctx->Const.UniformBufferOffsetAlignment = 16;
 538    ctx->Const.TextureBufferOffsetAlignment = 16;
 539
 540    if (brw->gen >= 6) {
 541       ctx->Const.MaxVarying = 32;
 542       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 543       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 544       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 545       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 546    }
 547
 548    /* We want the GLSL compiler to emit code that uses condition codes */
 549    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 550       ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
 551       ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
 552       ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
 553       ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
 554       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
 555       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
 556          (i == MESA_SHADER_FRAGMENT);
 557       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
 558          (i == MESA_SHADER_FRAGMENT);
 559       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
 560       ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
 561    }
 562
 563    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
 564    ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 565
 566    /* ARB_viewport_array */
 567    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
 568       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
 569       ctx->Const.ViewportSubpixelBits = 0;
 570
 571       /* Cast to float before negating becuase MaxViewportWidth is unsigned.
 572        */
 573       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 574       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 575    }
 576
 577    /* ARB_gpu_shader5 */
 578    if (brw->gen >= 7)
 579       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 580 }
 581
 582 /**
 583  * Process driconf (drirc) options, setting appropriate context flags.
 584  *
 585  * intelInitExtensions still pokes at optionCache directly, in order to
 586  * avoid advertising various extensions.  No flags are set, so it makes
 587  * sense to continue doing that there.
 588  */
 589 static void
 590 brw_process_driconf_options(struct brw_context *brw)
 591 {
 592    struct gl_context *ctx = &brw->ctx;
 593
 594    driOptionCache *options = &brw->optionCache;
 595    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 596                        brw->driContext->driScreenPriv->myNum, "i965");
 597
 598    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 599    switch (bo_reuse_mode) {
 600    case DRI_CONF_BO_REUSE_DISABLED:
 601       break;
 602    case DRI_CONF_BO_REUSE_ALL:
 603       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 604       break;
 605    }
 606
 607    if (!driQueryOptionb(options, "hiz")) {
 608        brw->has_hiz = false;
 609        /* On gen6, you can only do separate stencil with HIZ. */
 610        if (brw->gen == 6)
 611           brw->has_separate_stencil = false;
 612    }
 613
 614    if (driQueryOptionb(options, "always_flush_batch")) {
 615       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 616       brw->always_flush_batch = true;
 617    }
 618
 619    if (driQueryOptionb(options, "always_flush_cache")) {
 620       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 621       brw->always_flush_cache = true;
 622    }
 623
 624    if (driQueryOptionb(options, "disable_throttling")) {
 625       fprintf(stderr, "disabling flush throttling\n");
 626       brw->disable_throttling = true;
 627    }
 628
 629    brw->disable_derivative_optimization =
 630       driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
 631
 632    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 633
 634    ctx->Const.ForceGLSLExtensionsWarn =
 635       driQueryOptionb(options, "force_glsl_extensions_warn");
 636
 637    ctx->Const.DisableGLSLLineContinuations =
 638       driQueryOptionb(options, "disable_glsl_line_continuations");
 639
 640    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 641       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 642 }
 643
 644 GLboolean
 645 brwCreateContext(gl_api api,
 646                  const struct gl_config *mesaVis,
 647                  __DRIcontext *driContextPriv,
 648                  unsigned major_version,
 649                  unsigned minor_version,
 650                  uint32_t flags,
 651                  bool notify_reset,
 652                  unsigned *dri_ctx_error,
 653                  void *sharedContextPrivate)
 654 {
 655    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 656    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 657    struct intel_screen *screen = sPriv->driverPrivate;
 658    const struct brw_device_info *devinfo = screen->devinfo;
 659    struct dd_function_table functions;
 660
 661    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 662     * provides us with context reset notifications.
 663     */
 664    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 665       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 666
 667    if (screen->has_context_reset_notification)
 668       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 669
 670    if (flags & ~allowed_flags) {
 671       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 672       return false;
 673    }
 674
 675    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 676    if (!brw) {
 677       fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
 678       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 679       return false;
 680    }
 681
 682    driContextPriv->driverPrivate = brw;
 683    brw->driContext = driContextPriv;
 684    brw->intelScreen = screen;
 685    brw->bufmgr = screen->bufmgr;
 686
 687    brw->gen = devinfo->gen;
 688    brw->gt = devinfo->gt;
 689    brw->is_g4x = devinfo->is_g4x;
 690    brw->is_baytrail = devinfo->is_baytrail;
 691    brw->is_haswell = devinfo->is_haswell;
 692    brw->is_cherryview = devinfo->is_cherryview;
 693    brw->has_llc = devinfo->has_llc;
 694    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 695    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 696    brw->has_pln = devinfo->has_pln;
 697    brw->has_compr4 = devinfo->has_compr4;
 698    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 699    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 700    brw->needs_unlit_centroid_workaround =
 701       devinfo->needs_unlit_centroid_workaround;
 702
 703    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 704    brw->has_swizzling = screen->hw_has_swizzling;
 705
 706    brw->vs.base.stage = MESA_SHADER_VERTEX;
 707    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 708    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 709    if (brw->gen >= 8) {
 710       gen8_init_vtable_surface_functions(brw);
 711       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 712    } else if (brw->gen >= 7) {
 713       gen7_init_vtable_surface_functions(brw);
 714       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 715    } else if (brw->gen >= 6) {
 716       gen6_init_vtable_surface_functions(brw);
 717       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 718    } else {
 719       gen4_init_vtable_surface_functions(brw);
 720       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 721    }
 722
 723    brw_init_driver_functions(brw, &functions);
 724
 725    if (notify_reset)
 726       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 727
 728    struct gl_context *ctx = &brw->ctx;
 729
 730    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 731       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 732       fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
 733       intelDestroyContext(driContextPriv);
 734       return false;
 735    }
 736
 737    driContextSetFlags(ctx, flags);
 738
 739    /* Initialize the software rasterizer and helper modules.
 740     *
 741     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 742     * software fallbacks (which we have to support on legacy GL to do weird
 743     * glDrawPixels(), glBitmap(), and other functions).
 744     */
 745    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 746       _swrast_CreateContext(ctx);
 747    }
 748
 749    _vbo_CreateContext(ctx);
 750    if (ctx->swrast_context) {
 751       _tnl_CreateContext(ctx);
 752       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 753       _swsetup_CreateContext(ctx);
 754
 755       /* Configure swrast to match hardware characteristics: */
 756       _swrast_allow_pixel_fog(ctx, false);
 757       _swrast_allow_vertex_fog(ctx, true);
 758    }
 759
 760    _mesa_meta_init(ctx);
 761
 762    brw_process_driconf_options(brw);
 763    brw_process_intel_debug_variable(brw);
 764    brw_initialize_context_constants(brw);
 765
 766    ctx->Const.ResetStrategy = notify_reset
 767       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 768
 769    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 770    _mesa_init_point(ctx);
 771
 772    intel_fbo_init(brw);
 773
 774    intel_batchbuffer_init(brw);
 775
 776    if (brw->gen >= 6) {
 777       /* Create a new hardware context.  Using a hardware context means that
 778        * our GPU state will be saved/restored on context switch, allowing us
 779        * to assume that the GPU is in the same state we left it in.
 780        *
 781        * This is required for transform feedback buffer offsets, query objects,
 782        * and also allows us to reduce how much state we have to emit.
 783        */
 784       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 785
 786       if (!brw->hw_ctx) {
 787          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 788          intelDestroyContext(driContextPriv);
 789          return false;
 790       }
 791    }
 792
 793    brw_init_state(brw);
 794
 795    intelInitExtensions(ctx);
 796
 797    brw_init_surface_formats(brw);
 798
 799    brw->max_vs_threads = devinfo->max_vs_threads;
 800    brw->max_gs_threads = devinfo->max_gs_threads;
 801    brw->max_wm_threads = devinfo->max_wm_threads;
 802    brw->urb.size = devinfo->urb.size;
 803    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 804    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 805    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 806
 807    /* Estimate the size of the mappable aperture into the GTT.  There's an
 808     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 809     * It turns out it's basically always 256MB, though some ancient hardware
 810     * was smaller.
 811     */
 812    uint32_t gtt_size = 256 * 1024 * 1024;
 813
 814    /* We don't want to map two objects such that a memcpy between them would
 815     * just fault one mapping in and then the other over and over forever.  So
 816     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 817     * taken up by things like the framebuffer and the ringbuffer and such, so
 818     * be more conservative.
 819     */
 820    brw->max_gtt_map_object_size = gtt_size / 4;
 821
 822    if (brw->gen == 6)
 823       brw->urb.gs_present = false;
 824
 825    brw->prim_restart.in_progress = false;
 826    brw->prim_restart.enable_cut_index = false;
 827    brw->gs.enabled = false;
 828    brw->sf.viewport_transform_enable = true;
 829
 830    ctx->VertexProgram._MaintainTnlProgram = true;
 831    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 832
 833    brw_draw_init( brw );
 834
 835    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 836       /* Turn on some extra GL_ARB_debug_output generation. */
 837       brw->perf_debug = true;
 838    }
 839
 840    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 841       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 842
 843    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 844       brw_init_shader_time(brw);
 845
 846    _mesa_compute_version(ctx);
 847
 848    _mesa_initialize_dispatch_tables(ctx);
 849    _mesa_initialize_vbo_vtxfmt(ctx);
 850
 851    if (ctx->Extensions.AMD_performance_monitor) {
 852       brw_init_performance_monitors(brw);
 853    }
 854
 855    vbo_use_buffer_objects(ctx);
 856    vbo_always_unmap_buffers(ctx);
 857
 858    return true;
 859 }
 860
 861 void
 862 intelDestroyContext(__DRIcontext * driContextPriv)
 863 {
 864    struct brw_context *brw =
 865       (struct brw_context *) driContextPriv->driverPrivate;
 866    struct gl_context *ctx = &brw->ctx;
 867
 868    assert(brw); /* should never be null */
 869    if (!brw)
 870       return;
 871
 872    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 873    if (INTEL_DEBUG & DEBUG_AUB) {
 874       intel_batchbuffer_flush(brw);
 875       aub_dump_bmp(&brw->ctx);
 876    }
 877
 878    _mesa_meta_free(&brw->ctx);
 879    brw_meta_fast_clear_free(brw);
 880
 881    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 882       /* Force a report. */
 883       brw->shader_time.report_time = 0;
 884
 885       brw_collect_and_report_shader_time(brw);
 886       brw_destroy_shader_time(brw);
 887    }
 888
 889    brw_destroy_state(brw);
 890    brw_draw_destroy(brw);
 891
 892    drm_intel_bo_unreference(brw->curbe.curbe_bo);
 893
 894    drm_intel_gem_context_destroy(brw->hw_ctx);
 895
 896    if (ctx->swrast_context) {
 897       _swsetup_DestroyContext(&brw->ctx);
 898       _tnl_DestroyContext(&brw->ctx);
 899    }
 900    _vbo_DestroyContext(&brw->ctx);
 901
 902    if (ctx->swrast_context)
 903       _swrast_DestroyContext(&brw->ctx);
 904
 905    intel_batchbuffer_free(brw);
 906
 907    drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
 908    brw->first_post_swapbuffers_batch = NULL;
 909
 910    driDestroyOptionCache(&brw->optionCache);
 911
 912    /* free the Mesa context */
 913    _mesa_free_context_data(&brw->ctx);
 914
 915    ralloc_free(brw);
 916    driContextPriv->driverPrivate = NULL;
 917 }
 918
 919 GLboolean
 920 intelUnbindContext(__DRIcontext * driContextPriv)
 921 {
 922    /* Unset current context and dispath table */
 923    _mesa_make_current(NULL, NULL, NULL);
 924
 925    return true;
 926 }
 927
 928 /**
 929  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
 930  * on window system framebuffers.
 931  *
 932  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
 933  * your renderbuffer can do sRGB encode, and you can flip a switch that does
 934  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
 935  * for a visual where you're guaranteed to be capable, but it turns out that
 936  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
 937  * incapable ones, becuase there's no difference between the two in resources
 938  * used.  Applications thus get built that accidentally rely on the default
 939  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
 940  * great...
 941  *
 942  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
 943  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
 944  * So they removed the enable knob and made it "if the renderbuffer is sRGB
 945  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
 946  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
 947  * and get no sRGB encode (assuming that both kinds of visual are available).
 948  * Thus our choice to support sRGB by default on our visuals for desktop would
 949  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
 950  *
 951  * Unfortunately, renderbuffer setup happens before a context is created.  So
 952  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
 953  * context (without an sRGB visual, though we don't have sRGB visuals exposed
 954  * yet), we go turn that back off before anyone finds out.
 955  */
 956 static void
 957 intel_gles3_srgb_workaround(struct brw_context *brw,
 958                             struct gl_framebuffer *fb)
 959 {
 960    struct gl_context *ctx = &brw->ctx;
 961
 962    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
 963       return;
 964
 965    /* Some day when we support the sRGB capable bit on visuals available for
 966     * GLES, we'll need to respect that and not disable things here.
 967     */
 968    fb->Visual.sRGBCapable = false;
 969    for (int i = 0; i < BUFFER_COUNT; i++) {
 970       if (fb->Attachment[i].Renderbuffer &&
 971           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
 972          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
 973       }
 974    }
 975 }
 976
 977 GLboolean
 978 intelMakeCurrent(__DRIcontext * driContextPriv,
 979                  __DRIdrawable * driDrawPriv,
 980                  __DRIdrawable * driReadPriv)
 981 {
 982    struct brw_context *brw;
 983    GET_CURRENT_CONTEXT(curCtx);
 984
 985    if (driContextPriv)
 986       brw = (struct brw_context *) driContextPriv->driverPrivate;
 987    else
 988       brw = NULL;
 989
 990    /* According to the glXMakeCurrent() man page: "Pending commands to
 991     * the previous context, if any, are flushed before it is released."
 992     * But only flush if we're actually changing contexts.
 993     */
 994    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
 995       _mesa_flush(curCtx);
 996    }
 997
 998    if (driContextPriv) {
 999       struct gl_context *ctx = &brw->ctx;
1000       struct gl_framebuffer *fb, *readFb;
1001
1002       if (driDrawPriv == NULL) {
1003          fb = _mesa_get_incomplete_framebuffer();
1004       } else {
1005          fb = driDrawPriv->driverPrivate;
1006          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1007       }
1008
1009       if (driReadPriv == NULL) {
1010          readFb = _mesa_get_incomplete_framebuffer();
1011       } else {
1012          readFb = driReadPriv->driverPrivate;
1013          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1014       }
1015
1016       /* The sRGB workaround changes the renderbuffer's format. We must change
1017        * the format before the renderbuffer's miptree get's allocated, otherwise
1018        * the formats of the renderbuffer and its miptree will differ.
1019        */
1020       intel_gles3_srgb_workaround(brw, fb);
1021       intel_gles3_srgb_workaround(brw, readFb);
1022
1023       /* If the context viewport hasn't been initialized, force a call out to
1024        * the loader to get buffers so we have a drawable size for the initial
1025        * viewport. */
1026       if (!brw->ctx.ViewportInitialized)
1027          intel_prepare_render(brw);
1028
1029       _mesa_make_current(ctx, fb, readFb);
1030    } else {
1031       _mesa_make_current(NULL, NULL, NULL);
1032    }
1033
1034    return true;
1035 }
1036
1037 void
1038 intel_resolve_for_dri2_flush(struct brw_context *brw,
1039                              __DRIdrawable *drawable)
1040 {
1041    if (brw->gen < 6) {
1042       /* MSAA and fast color clear are not supported, so don't waste time
1043        * checking whether a resolve is needed.
1044        */
1045       return;
1046    }
1047
1048    struct gl_framebuffer *fb = drawable->driverPrivate;
1049    struct intel_renderbuffer *rb;
1050
1051    /* Usually, only the back buffer will need to be downsampled. However,
1052     * the front buffer will also need it if the user has rendered into it.
1053     */
1054    static const gl_buffer_index buffers[2] = {
1055          BUFFER_BACK_LEFT,
1056          BUFFER_FRONT_LEFT,
1057    };
1058
1059    for (int i = 0; i < 2; ++i) {
1060       rb = intel_get_renderbuffer(fb, buffers[i]);
1061       if (rb == NULL || rb->mt == NULL)
1062          continue;
1063       if (rb->mt->num_samples <= 1)
1064          intel_miptree_resolve_color(brw, rb->mt);
1065       else
1066          intel_renderbuffer_downsample(brw, rb);
1067    }
1068 }
1069
1070 static unsigned
1071 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1072 {
1073    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1074 }
1075
1076 static void
1077 intel_query_dri2_buffers(struct brw_context *brw,
1078                          __DRIdrawable *drawable,
1079                          __DRIbuffer **buffers,
1080                          int *count);
1081
1082 static void
1083 intel_process_dri2_buffer(struct brw_context *brw,
1084                           __DRIdrawable *drawable,
1085                           __DRIbuffer *buffer,
1086                           struct intel_renderbuffer *rb,
1087                           const char *buffer_name);
1088
1089 static void
1090 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1091
1092 static void
1093 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1094 {
1095    struct gl_framebuffer *fb = drawable->driverPrivate;
1096    struct intel_renderbuffer *rb;
1097    __DRIbuffer *buffers = NULL;
1098    int i, count;
1099    const char *region_name;
1100
1101    /* Set this up front, so that in case our buffers get invalidated
1102     * while we're getting new buffers, we don't clobber the stamp and
1103     * thus ignore the invalidate. */
1104    drawable->lastStamp = drawable->dri2.stamp;
1105
1106    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1107       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1108
1109    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1110
1111    if (buffers == NULL)
1112       return;
1113
1114    for (i = 0; i < count; i++) {
1115        switch (buffers[i].attachment) {
1116        case __DRI_BUFFER_FRONT_LEFT:
1117            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1118            region_name = "dri2 front buffer";
1119            break;
1120
1121        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1122            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1123            region_name = "dri2 fake front buffer";
1124            break;
1125
1126        case __DRI_BUFFER_BACK_LEFT:
1127            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1128            region_name = "dri2 back buffer";
1129            break;
1130
1131        case __DRI_BUFFER_DEPTH:
1132        case __DRI_BUFFER_HIZ:
1133        case __DRI_BUFFER_DEPTH_STENCIL:
1134        case __DRI_BUFFER_STENCIL:
1135        case __DRI_BUFFER_ACCUM:
1136        default:
1137            fprintf(stderr,
1138                    "unhandled buffer attach event, attachment type %d\n",
1139                    buffers[i].attachment);
1140            return;
1141        }
1142
1143        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1144    }
1145
1146 }
1147
1148 void
1149 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1150 {
1151    struct brw_context *brw = context->driverPrivate;
1152    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1153
1154    /* Set this up front, so that in case our buffers get invalidated
1155     * while we're getting new buffers, we don't clobber the stamp and
1156     * thus ignore the invalidate. */
1157    drawable->lastStamp = drawable->dri2.stamp;
1158
1159    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1160       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1161
1162    if (screen->image.loader)
1163       intel_update_image_buffers(brw, drawable);
1164    else
1165       intel_update_dri2_buffers(brw, drawable);
1166
1167    driUpdateFramebufferSize(&brw->ctx, drawable);
1168 }
1169
1170 /**
1171  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1172  * state is required.
1173  */
1174 void
1175 intel_prepare_render(struct brw_context *brw)
1176 {
1177    struct gl_context *ctx = &brw->ctx;
1178    __DRIcontext *driContext = brw->driContext;
1179    __DRIdrawable *drawable;
1180
1181    drawable = driContext->driDrawablePriv;
1182    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1183       if (drawable->lastStamp != drawable->dri2.stamp)
1184          intel_update_renderbuffers(driContext, drawable);
1185       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1186    }
1187
1188    drawable = driContext->driReadablePriv;
1189    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1190       if (drawable->lastStamp != drawable->dri2.stamp)
1191          intel_update_renderbuffers(driContext, drawable);
1192       driContext->dri2.read_stamp = drawable->dri2.stamp;
1193    }
1194
1195    /* If we're currently rendering to the front buffer, the rendering
1196     * that will happen next will probably dirty the front buffer.  So
1197     * mark it as dirty here.
1198     */
1199    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
1200       brw->front_buffer_dirty = true;
1201
1202    /* Wait for the swapbuffers before the one we just emitted, so we
1203     * don't get too many swaps outstanding for apps that are GPU-heavy
1204     * but not CPU-heavy.
1205     *
1206     * We're using intelDRI2Flush (called from the loader before
1207     * swapbuffer) and glFlush (for front buffer rendering) as the
1208     * indicator that a frame is done and then throttle when we get
1209     * here as we prepare to render the next frame.  At this point for
1210     * round trips for swap/copy and getting new buffers are done and
1211     * we'll spend less time waiting on the GPU.
1212     *
1213     * Unfortunately, we don't have a handle to the batch containing
1214     * the swap, and getting our hands on that doesn't seem worth it,
1215     * so we just us the first batch we emitted after the last swap.
1216     */
1217    if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
1218       if (!brw->disable_throttling)
1219          drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
1220       drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
1221       brw->first_post_swapbuffers_batch = NULL;
1222       brw->need_throttle = false;
1223    }
1224 }
1225
1226 /**
1227  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1228  *
1229  * To determine which DRI buffers to request, examine the renderbuffers
1230  * attached to the drawable's framebuffer. Then request the buffers with
1231  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1232  *
1233  * This is called from intel_update_renderbuffers().
1234  *
1235  * \param drawable      Drawable whose buffers are queried.
1236  * \param buffers       [out] List of buffers returned by DRI2 query.
1237  * \param buffer_count  [out] Number of buffers returned.
1238  *
1239  * \see intel_update_renderbuffers()
1240  * \see DRI2GetBuffers()
1241  * \see DRI2GetBuffersWithFormat()
1242  */
1243 static void
1244 intel_query_dri2_buffers(struct brw_context *brw,
1245                          __DRIdrawable *drawable,
1246                          __DRIbuffer **buffers,
1247                          int *buffer_count)
1248 {
1249    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1250    struct gl_framebuffer *fb = drawable->driverPrivate;
1251    int i = 0;
1252    unsigned attachments[8];
1253
1254    struct intel_renderbuffer *front_rb;
1255    struct intel_renderbuffer *back_rb;
1256
1257    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1258    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1259
1260    memset(attachments, 0, sizeof(attachments));
1261    if ((brw_is_front_buffer_drawing(fb) ||
1262         brw_is_front_buffer_reading(fb) ||
1263         !back_rb) && front_rb) {
1264       /* If a fake front buffer is in use, then querying for
1265        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1266        * the real front buffer to the fake front buffer.  So before doing the
1267        * query, we need to make sure all the pending drawing has landed in the
1268        * real front buffer.
1269        */
1270       intel_batchbuffer_flush(brw);
1271       intel_flush_front(&brw->ctx);
1272
1273       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1274       attachments[i++] = intel_bits_per_pixel(front_rb);
1275    } else if (front_rb && brw->front_buffer_dirty) {
1276       /* We have pending front buffer rendering, but we aren't querying for a
1277        * front buffer.  If the front buffer we have is a fake front buffer,
1278        * the X server is going to throw it away when it processes the query.
1279        * So before doing the query, make sure all the pending drawing has
1280        * landed in the real front buffer.
1281        */
1282       intel_batchbuffer_flush(brw);
1283       intel_flush_front(&brw->ctx);
1284    }
1285
1286    if (back_rb) {
1287       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1288       attachments[i++] = intel_bits_per_pixel(back_rb);
1289    }
1290
1291    assert(i <= ARRAY_SIZE(attachments));
1292
1293    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1294                                                         &drawable->w,
1295                                                         &drawable->h,
1296                                                         attachments, i / 2,
1297                                                         buffer_count,
1298                                                         drawable->loaderPrivate);
1299 }
1300
1301 /**
1302  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1303  *
1304  * This is called from intel_update_renderbuffers().
1305  *
1306  * \par Note:
1307  *    DRI buffers whose attachment point is DRI2BufferStencil or
1308  *    DRI2BufferDepthStencil are handled as special cases.
1309  *
1310  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1311  *        that is passed to drm_intel_bo_gem_create_from_name().
1312  *
1313  * \see intel_update_renderbuffers()
1314  */
1315 static void
1316 intel_process_dri2_buffer(struct brw_context *brw,
1317                           __DRIdrawable *drawable,
1318                           __DRIbuffer *buffer,
1319                           struct intel_renderbuffer *rb,
1320                           const char *buffer_name)
1321 {
1322    struct gl_framebuffer *fb = drawable->driverPrivate;
1323    drm_intel_bo *bo;
1324
1325    if (!rb)
1326       return;
1327
1328    unsigned num_samples = rb->Base.Base.NumSamples;
1329
1330    /* We try to avoid closing and reopening the same BO name, because the first
1331     * use of a mapping of the buffer involves a bunch of page faulting which is
1332     * moderately expensive.
1333     */
1334    struct intel_mipmap_tree *last_mt;
1335    if (num_samples == 0)
1336       last_mt = rb->mt;
1337    else
1338       last_mt = rb->singlesample_mt;
1339
1340    uint32_t old_name = 0;
1341    if (last_mt) {
1342        /* The bo already has a name because the miptree was created by a
1343         * previous call to intel_process_dri2_buffer(). If a bo already has a
1344         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1345         * create a new name.
1346         */
1347       drm_intel_bo_flink(last_mt->bo, &old_name);
1348    }
1349
1350    if (old_name == buffer->name)
1351       return;
1352
1353    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1354       fprintf(stderr,
1355               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1356               buffer->name, buffer->attachment,
1357               buffer->cpp, buffer->pitch);
1358    }
1359
1360    intel_miptree_release(&rb->mt);
1361    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1362                                           buffer->name);
1363    if (!bo) {
1364       fprintf(stderr,
1365               "Failed to open BO for returned DRI2 buffer "
1366               "(%dx%d, %s, named %d).\n"
1367               "This is likely a bug in the X Server that will lead to a "
1368               "crash soon.\n",
1369               drawable->w, drawable->h, buffer_name, buffer->name);
1370       return;
1371    }
1372
1373    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1374                                             drawable->w, drawable->h,
1375                                             buffer->pitch);
1376
1377    if (brw_is_front_buffer_drawing(fb) &&
1378        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1379         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1380        rb->Base.Base.NumSamples > 1) {
1381       intel_renderbuffer_upsample(brw, rb);
1382    }
1383
1384    assert(rb->mt);
1385
1386    drm_intel_bo_unreference(bo);
1387 }
1388
1389 /**
1390  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1391  *
1392  * To determine which DRI buffers to request, examine the renderbuffers
1393  * attached to the drawable's framebuffer. Then request the buffers from
1394  * the image loader
1395  *
1396  * This is called from intel_update_renderbuffers().
1397  *
1398  * \param drawable      Drawable whose buffers are queried.
1399  * \param buffers       [out] List of buffers returned by DRI2 query.
1400  * \param buffer_count  [out] Number of buffers returned.
1401  *
1402  * \see intel_update_renderbuffers()
1403  */
1404
1405 static void
1406 intel_update_image_buffer(struct brw_context *intel,
1407                           __DRIdrawable *drawable,
1408                           struct intel_renderbuffer *rb,
1409                           __DRIimage *buffer,
1410                           enum __DRIimageBufferMask buffer_type)
1411 {
1412    struct gl_framebuffer *fb = drawable->driverPrivate;
1413
1414    if (!rb || !buffer->bo)
1415       return;
1416
1417    unsigned num_samples = rb->Base.Base.NumSamples;
1418
1419    /* Check and see if we're already bound to the right
1420     * buffer object
1421     */
1422    struct intel_mipmap_tree *last_mt;
1423    if (num_samples == 0)
1424       last_mt = rb->mt;
1425    else
1426       last_mt = rb->singlesample_mt;
1427
1428    if (last_mt && last_mt->bo == buffer->bo)
1429       return;
1430
1431    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1432                                             buffer->width, buffer->height,
1433                                             buffer->pitch);
1434
1435    if (brw_is_front_buffer_drawing(fb) &&
1436        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1437        rb->Base.Base.NumSamples > 1) {
1438       intel_renderbuffer_upsample(intel, rb);
1439    }
1440 }
1441
1442 static void
1443 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1444 {
1445    struct gl_framebuffer *fb = drawable->driverPrivate;
1446    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1447    struct intel_renderbuffer *front_rb;
1448    struct intel_renderbuffer *back_rb;
1449    struct __DRIimageList images;
1450    unsigned int format;
1451    uint32_t buffer_mask = 0;
1452
1453    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1454    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1455
1456    if (back_rb)
1457       format = intel_rb_format(back_rb);
1458    else if (front_rb)
1459       format = intel_rb_format(front_rb);
1460    else
1461       return;
1462
1463    if (front_rb && (brw_is_front_buffer_drawing(fb) ||
1464                     brw_is_front_buffer_reading(fb) || !back_rb)) {
1465       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1466    }
1467
1468    if (back_rb)
1469       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1470
1471    (*screen->image.loader->getBuffers) (drawable,
1472                                         driGLFormatToImageFormat(format),
1473                                         &drawable->dri2.stamp,
1474                                         drawable->loaderPrivate,
1475                                         buffer_mask,
1476                                         &images);
1477
1478    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1479       drawable->w = images.front->width;
1480       drawable->h = images.front->height;
1481       intel_update_image_buffer(brw,
1482                                 drawable,
1483                                 front_rb,
1484                                 images.front,
1485                                 __DRI_IMAGE_BUFFER_FRONT);
1486    }
1487    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1488       drawable->w = images.back->width;
1489       drawable->h = images.back->height;
1490       intel_update_image_buffer(brw,
1491                                 drawable,
1492                                 back_rb,
1493                                 images.back,
1494                                 __DRI_IMAGE_BUFFER_BACK);
1495    }
1496 }