src/mesa/drivers/dri/i965/brw_meta_fast_clear.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "main/mtypes.h"
  25 #include "main/macros.h"
  26 #include "main/context.h"
  27 #include "main/objectlabel.h"
  28 #include "main/shaderapi.h"
  29 #include "main/arrayobj.h"
  30 #include "main/bufferobj.h"
  31 #include "main/buffers.h"
  32 #include "main/blend.h"
  33 #include "main/enable.h"
  34 #include "main/depth.h"
  35 #include "main/stencil.h"
  36 #include "main/varray.h"
  37 #include "main/uniforms.h"
  38 #include "main/fbobject.h"
  39 #include "main/texobj.h"
  40
  41 #include "main/api_validate.h"
  42 #include "main/state.h"
  43
  44 #include "util/format_srgb.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/meta.h"
  49
  50 #include "brw_defines.h"
  51 #include "brw_context.h"
  52 #include "brw_draw.h"
  53 #include "brw_state.h"
  54 #include "intel_fbo.h"
  55 #include "intel_batchbuffer.h"
  56
  57 #include "brw_blorp.h"
  58
  59 struct brw_fast_clear_state {
  60    struct gl_buffer_object *buf_obj;
  61    struct gl_vertex_array_object *array_obj;
  62    GLuint vao;
  63    GLuint shader_prog;
  64    GLint color_location;
  65 };
  66
  67 static bool
  68 brw_fast_clear_init(struct brw_context *brw)
  69 {
  70    struct brw_fast_clear_state *clear;
  71    struct gl_context *ctx = &brw->ctx;
  72
  73    if (brw->fast_clear_state) {
  74       clear = brw->fast_clear_state;
  75       _mesa_BindVertexArray(clear->vao);
  76       return true;
  77    }
  78
  79    brw->fast_clear_state = clear = malloc(sizeof *clear);
  80    if (clear == NULL)
  81       return false;
  82
  83    memset(clear, 0, sizeof *clear);
  84    _mesa_GenVertexArrays(1, &clear->vao);
  85    _mesa_BindVertexArray(clear->vao);
  86
  87    clear->buf_obj = ctx->Driver.NewBufferObject(ctx, 0xDEADBEEF);
  88    if (clear->buf_obj == NULL)
  89       return false;
  90
  91    clear->array_obj = _mesa_lookup_vao(ctx, clear->vao);
  92    assert(clear->array_obj != NULL);
  93
  94    _mesa_update_array_format(ctx, clear->array_obj, VERT_ATTRIB_GENERIC(0),
  95                              2, GL_FLOAT, GL_RGBA, GL_FALSE, GL_FALSE, GL_FALSE,
  96                              0, true);
  97    _mesa_bind_vertex_buffer(ctx, clear->array_obj, VERT_ATTRIB_GENERIC(0),
  98                             clear->buf_obj, 0, sizeof(float) * 2);
  99    _mesa_enable_vertex_array_attrib(ctx, clear->array_obj,
 100                                     VERT_ATTRIB_GENERIC(0));
 101
 102    return true;
 103 }
 104
 105 static void
 106 brw_bind_rep_write_shader(struct brw_context *brw, float *color)
 107 {
 108    const char *vs_source =
 109       "#extension GL_AMD_vertex_shader_layer : enable\n"
 110       "#extension GL_ARB_draw_instanced : enable\n"
 111       "attribute vec4 position;\n"
 112       "uniform int layer;\n"
 113       "void main()\n"
 114       "{\n"
 115       "#ifdef GL_AMD_vertex_shader_layer\n"
 116       "   gl_Layer = gl_InstanceID;\n"
 117       "#endif\n"
 118       "   gl_Position = position;\n"
 119       "}\n";
 120    const char *fs_source =
 121       "uniform vec4 color;\n"
 122       "void main()\n"
 123       "{\n"
 124       "   gl_FragColor = color;\n"
 125       "}\n";
 126
 127    GLuint vs, fs;
 128    struct brw_fast_clear_state *clear = brw->fast_clear_state;
 129    struct gl_context *ctx = &brw->ctx;
 130
 131    if (clear->shader_prog) {
 132       _mesa_UseProgram(clear->shader_prog);
 133       _mesa_Uniform4fv(clear->color_location, 1, color);
 134       return;
 135    }
 136
 137    vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, vs_source);
 138    fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, fs_source);
 139
 140    clear->shader_prog = _mesa_CreateProgram();
 141    _mesa_AttachShader(clear->shader_prog, fs);
 142    _mesa_DeleteShader(fs);
 143    _mesa_AttachShader(clear->shader_prog, vs);
 144    _mesa_DeleteShader(vs);
 145    _mesa_BindAttribLocation(clear->shader_prog, 0, "position");
 146    _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear");
 147    _mesa_LinkProgram(clear->shader_prog);
 148
 149    clear->color_location =
 150       _mesa_GetUniformLocation(clear->shader_prog, "color");
 151
 152    _mesa_UseProgram(clear->shader_prog);
 153    _mesa_Uniform4fv(clear->color_location, 1, color);
 154 }
 155
 156 void
 157 brw_meta_fast_clear_free(struct brw_context *brw)
 158 {
 159    struct brw_fast_clear_state *clear = brw->fast_clear_state;
 160    GET_CURRENT_CONTEXT(old_context);
 161
 162    if (clear == NULL)
 163       return;
 164
 165    _mesa_make_current(&brw->ctx, NULL, NULL);
 166
 167    _mesa_DeleteVertexArrays(1, &clear->vao);
 168    _mesa_reference_buffer_object(&brw->ctx, &clear->buf_obj, NULL);
 169    _mesa_DeleteProgram(clear->shader_prog);
 170    free(clear);
 171
 172    if (old_context)
 173       _mesa_make_current(old_context, old_context->WinSysDrawBuffer, old_context->WinSysReadBuffer);
 174    else
 175       _mesa_make_current(NULL, NULL, NULL);
 176 }
 177
 178 struct rect {
 179    int x0, y0, x1, y1;
 180 };
 181
 182 static void
 183 brw_draw_rectlist(struct brw_context *brw, struct rect *rect, int num_instances)
 184 {
 185    struct gl_context *ctx = &brw->ctx;
 186    struct brw_fast_clear_state *clear = brw->fast_clear_state;
 187    int start = 0, count = 3;
 188    struct _mesa_prim prim;
 189    float verts[6];
 190
 191    verts[0] = rect->x1;
 192    verts[1] = rect->y1;
 193    verts[2] = rect->x0;
 194    verts[3] = rect->y1;
 195    verts[4] = rect->x0;
 196    verts[5] = rect->y0;
 197
 198    /* upload new vertex data */
 199    _mesa_buffer_data(ctx, clear->buf_obj, GL_NONE, sizeof(verts), verts,
 200                      GL_DYNAMIC_DRAW, __func__);
 201
 202    if (ctx->NewState)
 203       _mesa_update_state(ctx);
 204
 205    vbo_bind_arrays(ctx);
 206
 207    memset(&prim, 0, sizeof prim);
 208    prim.begin = 1;
 209    prim.end = 1;
 210    prim.mode = BRW_PRIM_OFFSET + _3DPRIM_RECTLIST;
 211    prim.num_instances = num_instances;
 212    prim.start = start;
 213    prim.count = count;
 214
 215    /* Make sure our internal prim value doesn't clash with a valid GL value. */
 216    assert(!_mesa_is_valid_prim_mode(ctx, prim.mode));
 217
 218    brw_draw_prims(ctx, &prim, 1, NULL,
 219                   GL_TRUE, start, start + count - 1,
 220                   NULL, 0, NULL);
 221 }
 222
 223 static void
 224 get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
 225                     struct intel_renderbuffer *irb, struct rect *rect)
 226 {
 227    unsigned int x_align, y_align;
 228    unsigned int x_scaledown, y_scaledown;
 229
 230    if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) {
 231       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 232        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
 233        *
 234        *     Clear pass must have a clear rectangle that must follow
 235        *     alignment rules in terms of pixels and lines as shown in the
 236        *     table below. Further, the clear-rectangle height and width
 237        *     must be multiple of the following dimensions. If the height
 238        *     and width of the render target being cleared do not meet these
 239        *     requirements, an MCS buffer can be created such that it
 240        *     follows the requirement and covers the RT.
 241        *
 242        * The alignment size in the table that follows is related to the
 243        * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
 244        * with X alignment multiplied by 16 and Y alignment multiplied by 32.
 245        */
 246       intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
 247       x_align *= 16;
 248
 249       /* SKL+ line alignment requirement for Y-tiled are half those of the prior
 250        * generations.
 251        */
 252       if (brw->gen >= 9)
 253          y_align *= 16;
 254       else
 255          y_align *= 32;
 256
 257       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 258        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
 259        *
 260        *     In order to optimize the performance MCS buffer (when bound to
 261        *     1X RT) clear similarly to MCS buffer clear for MSRT case,
 262        *     clear rect is required to be scaled by the following factors
 263        *     in the horizontal and vertical directions:
 264        *
 265        * The X and Y scale down factors in the table that follows are each
 266        * equal to half the alignment value computed above.
 267        */
 268       x_scaledown = x_align / 2;
 269       y_scaledown = y_align / 2;
 270
 271       /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
 272        * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
 273        * Clear of Non-MultiSampled Render Target Restrictions":
 274        *
 275        *   Clear rectangle must be aligned to two times the number of
 276        *   pixels in the table shown below due to 16x16 hashing across the
 277        *   slice.
 278        */
 279       x_align *= 2;
 280       y_align *= 2;
 281    } else {
 282       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 283        * Target(s)", beneath the "MSAA Compression" bullet (p326):
 284        *
 285        *     Clear pass for this case requires that scaled down primitive
 286        *     is sent down with upper left co-ordinate to coincide with
 287        *     actual rectangle being cleared. For MSAA, clear rectangle’s
 288        *     height and width need to as show in the following table in
 289        *     terms of (width,height) of the RT.
 290        *
 291        *     MSAA  Width of Clear Rect  Height of Clear Rect
 292        *      2X     Ceil(1/8*width)      Ceil(1/2*height)
 293        *      4X     Ceil(1/8*width)      Ceil(1/2*height)
 294        *      8X     Ceil(1/2*width)      Ceil(1/2*height)
 295        *     16X         width            Ceil(1/2*height)
 296        *
 297        * The text "with upper left co-ordinate to coincide with actual
 298        * rectangle being cleared" is a little confusing--it seems to imply
 299        * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
 300        * feed the pipeline using the rectangle (x,y) to
 301        * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on
 302        * the number of samples.  Experiments indicate that this is not
 303        * quite correct; actually, what the hardware appears to do is to
 304        * align whatever rectangle is sent down the pipeline to the nearest
 305        * multiple of 2x2 blocks, and then scale it up by a factor of N
 306        * horizontally and 2 vertically.  So the resulting alignment is 4
 307        * vertically and either 4 or 16 horizontally, and the scaledown
 308        * factor is 2 vertically and either 2 or 8 horizontally.
 309        */
 310       switch (irb->mt->num_samples) {
 311       case 2:
 312       case 4:
 313          x_scaledown = 8;
 314          break;
 315       case 8:
 316          x_scaledown = 2;
 317          break;
 318       case 16:
 319          x_scaledown = 1;
 320          break;
 321       default:
 322          unreachable("Unexpected sample count for fast clear");
 323       }
 324       y_scaledown = 2;
 325       x_align = x_scaledown * 2;
 326       y_align = y_scaledown * 2;
 327    }
 328
 329    rect->x0 = fb->_Xmin;
 330    rect->x1 = fb->_Xmax;
 331    if (fb->Name != 0) {
 332       rect->y0 = fb->_Ymin;
 333       rect->y1 = fb->_Ymax;
 334    } else {
 335       rect->y0 = fb->Height - fb->_Ymax;
 336       rect->y1 = fb->Height - fb->_Ymin;
 337    }
 338
 339    rect->x0 = ROUND_DOWN_TO(rect->x0,  x_align) / x_scaledown;
 340    rect->y0 = ROUND_DOWN_TO(rect->y0, y_align) / y_scaledown;
 341    rect->x1 = ALIGN(rect->x1, x_align) / x_scaledown;
 342    rect->y1 = ALIGN(rect->y1, y_align) / y_scaledown;
 343 }
 344
 345 static void
 346 get_buffer_rect(const struct gl_framebuffer *fb, struct rect *rect)
 347 {
 348    rect->x0 = fb->_Xmin;
 349    rect->x1 = fb->_Xmax;
 350    if (fb->Name != 0) {
 351       rect->y0 = fb->_Ymin;
 352       rect->y1 = fb->_Ymax;
 353    } else {
 354       rect->y0 = fb->Height - fb->_Ymax;
 355       rect->y1 = fb->Height - fb->_Ymin;
 356    }
 357 }
 358
 359 /**
 360  * Determine if fast color clear supports the given clear color.
 361  *
 362  * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
 363  * moment we only support floating point, unorm, and snorm buffers.
 364  */
 365 static bool
 366 is_color_fast_clear_compatible(struct brw_context *brw,
 367                                mesa_format format,
 368                                const union gl_color_union *color)
 369 {
 370    if (_mesa_is_format_integer_color(format)) {
 371       if (brw->gen >= 8) {
 372          perf_debug("Integer fast clear not enabled for (%s)",
 373                     _mesa_get_format_name(format));
 374       }
 375       return false;
 376    }
 377
 378    for (int i = 0; i < 4; i++) {
 379       if (!_mesa_format_has_color_component(format, i)) {
 380          continue;
 381       }
 382
 383       if (brw->gen < 9 &&
 384           color->f[i] != 0.0f && color->f[i] != 1.0f) {
 385          return false;
 386       }
 387    }
 388    return true;
 389 }
 390
 391 /**
 392  * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
 393  * SURFACE_STATE (DWORD 12-15 on SKL+).
 394  */
 395 static void
 396 set_fast_clear_color(struct brw_context *brw,
 397                      struct intel_mipmap_tree *mt,
 398                      const union gl_color_union *color)
 399 {
 400    union gl_color_union override_color = *color;
 401
 402    /* The sampler doesn't look at the format of the surface when the fast
 403     * clear color is used so we need to implement luminance, intensity and
 404     * missing components manually.
 405     */
 406    switch (_mesa_get_format_base_format(mt->format)) {
 407    case GL_INTENSITY:
 408       override_color.ui[3] = override_color.ui[0];
 409       /* flow through */
 410    case GL_LUMINANCE:
 411    case GL_LUMINANCE_ALPHA:
 412       override_color.ui[1] = override_color.ui[0];
 413       override_color.ui[2] = override_color.ui[0];
 414       break;
 415    default:
 416       for (int i = 0; i < 3; i++) {
 417          if (!_mesa_format_has_color_component(mt->format, i))
 418             override_color.ui[i] = 0;
 419       }
 420       break;
 421    }
 422
 423    if (!_mesa_format_has_color_component(mt->format, 3)) {
 424       if (_mesa_is_format_integer_color(mt->format))
 425          override_color.ui[3] = 1;
 426       else
 427          override_color.f[3] = 1.0f;
 428    }
 429
 430    /* Handle linear→SRGB conversion */
 431    if (brw->ctx.Color.sRGBEnabled &&
 432        _mesa_get_srgb_format_linear(mt->format) != mt->format) {
 433       for (int i = 0; i < 3; i++) {
 434          override_color.f[i] =
 435             util_format_linear_to_srgb_float(override_color.f[i]);
 436       }
 437    }
 438
 439    if (brw->gen >= 9) {
 440       mt->gen9_fast_clear_color = override_color;
 441    } else {
 442       mt->fast_clear_color_value = 0;
 443       for (int i = 0; i < 4; i++) {
 444          /* Testing for non-0 works for integer and float colors */
 445          if (override_color.f[i] != 0.0f) {
 446              mt->fast_clear_color_value |=
 447                 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
 448          }
 449       }
 450    }
 451 }
 452
 453 static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
 454
 455 static void
 456 set_fast_clear_op(struct brw_context *brw, uint32_t op)
 457 {
 458    /* Set op and dirty BRW_NEW_FRAGMENT_PROGRAM to make sure we re-emit
 459     * 3DSTATE_PS.
 460     */
 461    brw->wm.fast_clear_op = op;
 462    brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 463 }
 464
 465 static void
 466 use_rectlist(struct brw_context *brw, bool enable)
 467 {
 468    /* Set custom state to let us use _3DPRIM_RECTLIST and the replicated
 469     * rendertarget write.  When we enable reclist mode, we disable the
 470     * viewport transform, disable clipping, enable the rep16 write
 471     * optimization and disable simd8 dispatch in the PS.
 472     */
 473    brw->sf.viewport_transform_enable = !enable;
 474    brw->use_rep_send = enable;
 475    brw->no_simd8 = enable;
 476
 477    /* Dirty state to make sure we reemit the state packages affected by the
 478     * custom state.  We dirty BRW_NEW_FRAGMENT_PROGRAM to emit 3DSTATE_PS for
 479     * disabling simd8 dispatch, _NEW_LIGHT to emit 3DSTATE_SF for disabling
 480     * the viewport transform and 3DSTATE_CLIP to disable clipping for the
 481     * reclist primitive.  This is a little messy - it would be nicer to
 482     * BRW_NEW_FAST_CLEAR flag or so, but we're out of brw state bits.  Dirty
 483     * _NEW_BUFFERS to make sure we emit new SURFACE_STATE with the new fast
 484     * clear color value.
 485     */
 486    brw->NewGLState |= _NEW_LIGHT | _NEW_BUFFERS;
 487    brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 488 }
 489
 490 /**
 491  * Individually fast clear each color buffer attachment. On previous gens this
 492  * isn't required. The motivation for this comes from one line (which seems to
 493  * be specific to SKL+). The list item is in section titled _MCS Buffer for
 494  * Render Target(s)_
 495  *
 496  *   "Since only one RT is bound with a clear pass, only one RT can be cleared
 497  *   at a time. To clear multiple RTs, multiple clear passes are required."
 498  *
 499  * The code follows the same idea as the resolve code which creates a fake FBO
 500  * to avoid interfering with too much of the GL state.
 501  */
 502 static void
 503 fast_clear_attachments(struct brw_context *brw,
 504                        struct gl_framebuffer *fb,
 505                        uint32_t fast_clear_buffers,
 506                        struct rect fast_clear_rect)
 507 {
 508    struct gl_context *ctx = &brw->ctx;
 509    const bool srgb_enabled = ctx->Color.sRGBEnabled;
 510
 511    assert(brw->gen >= 9);
 512
 513    /* Make sure the GL_FRAMEBUFFER_SRGB is disabled during fast clear so that
 514     * the surface state will always be uploaded with a linear buffer. SRGB
 515     * buffers are not supported on Gen9 because they are not marked as
 516     * losslessly compressible. This shouldn't matter for the fast clear
 517     * because the color is not written to the framebuffer yet so the hardware
 518     * doesn't need to do any SRGB conversion.
 519     */
 520    if (srgb_enabled)
 521       _mesa_set_framebuffer_srgb(ctx, GL_FALSE);
 522
 523    brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 524
 525    /* SKL+ also has a resolve mode for compressed render targets and thus more
 526     * bits to let us select the type of resolve.  For fast clear resolves, it
 527     * turns out we can use the same value as pre-SKL though.
 528     */
 529    set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
 530
 531    while (fast_clear_buffers) {
 532       int index = ffs(fast_clear_buffers) - 1;
 533
 534       fast_clear_buffers &= ~(1 << index);
 535
 536       _mesa_meta_drawbuffers_from_bitfield(1 << index);
 537
 538       brw_draw_rectlist(brw, &fast_clear_rect, MAX2(1, fb->MaxNumLayers));
 539
 540       /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
 541        * resolve them eventually.
 542        */
 543       struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0];
 544       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 545       irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
 546    }
 547
 548    set_fast_clear_op(brw, 0);
 549
 550    if (srgb_enabled)
 551       _mesa_set_framebuffer_srgb(ctx, GL_TRUE);
 552 }
 553
 554 bool
 555 brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
 556                     GLbitfield buffers, bool partial_clear)
 557 {
 558    struct gl_context *ctx = &brw->ctx;
 559    mesa_format format;
 560    enum { FAST_CLEAR, REP_CLEAR, PLAIN_CLEAR } clear_type;
 561    GLbitfield plain_clear_buffers, meta_save, rep_clear_buffers, fast_clear_buffers;
 562    struct rect fast_clear_rect, clear_rect;
 563    int layers;
 564
 565    fast_clear_buffers = rep_clear_buffers = plain_clear_buffers = 0;
 566
 567    /* First we loop through the color draw buffers and determine which ones
 568     * can be fast cleared, which ones can use the replicated write and which
 569     * ones have to fall back to regular color clear.
 570     */
 571    for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
 572       struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
 573       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 574       int index = fb->_ColorDrawBufferIndexes[buf];
 575
 576       /* Only clear the buffers present in the provided mask */
 577       if (((1 << index) & buffers) == 0)
 578          continue;
 579
 580       /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
 581        * the framebuffer can be complete with some attachments missing.  In
 582        * this case the _ColorDrawBuffers pointer will be NULL.
 583        */
 584       if (rb == NULL)
 585          continue;
 586
 587       clear_type = FAST_CLEAR;
 588
 589       /* We don't have fast clear until gen7. */
 590       if (brw->gen < 7)
 591          clear_type = REP_CLEAR;
 592
 593       /* If we're mapping the render format to a different format than the
 594        * format we use for texturing then it is a bit questionable whether it
 595        * should be possible to use a fast clear. Although we only actually
 596        * render using a renderable format, without the override workaround it
 597        * wouldn't be possible to have a non-renderable surface in a fast clear
 598        * state so the hardware probably legitimately doesn't need to support
 599        * this case. At least on Gen9 this really does seem to cause problems.
 600        */
 601       if (brw->gen >= 9 &&
 602           brw_format_for_mesa_format(irb->mt->format) !=
 603           brw->render_target_format[irb->mt->format])
 604          clear_type = REP_CLEAR;
 605
 606       /* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When
 607        * GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be
 608        * resolved in intel_update_state. In that case it's pointless to do a
 609        * fast clear because it's very likely to be immediately resolved.
 610        */
 611       if (brw->gen >= 9 &&
 612           irb->mt->num_samples <= 1 &&
 613           brw->ctx.Color.sRGBEnabled &&
 614           _mesa_get_srgb_format_linear(irb->mt->format) != irb->mt->format)
 615          clear_type = REP_CLEAR;
 616
 617       if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
 618          clear_type = REP_CLEAR;
 619
 620       /* We can't do scissored fast clears because of the restrictions on the
 621        * fast clear rectangle size.
 622        */
 623       if (partial_clear)
 624          clear_type = REP_CLEAR;
 625
 626       /* Fast clear is only supported for colors where all components are
 627        * either 0 or 1.
 628        */
 629       format = _mesa_get_render_format(ctx, irb->mt->format);
 630       if (!is_color_fast_clear_compatible(brw, format, &ctx->Color.ClearColor))
 631          clear_type = REP_CLEAR;
 632
 633       /* From the SNB PRM (Vol4_Part1):
 634        *
 635        *     "Replicated data (Message Type = 111) is only supported when
 636        *      accessing tiled memory.  Using this Message Type to access
 637        *      linear (untiled) memory is UNDEFINED."
 638        */
 639       if (irb->mt->tiling == I915_TILING_NONE) {
 640          perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n",
 641                     irb->mt->logical_width0, irb->mt->logical_height0);
 642          clear_type = PLAIN_CLEAR;
 643       }
 644
 645       /* Constant color writes ignore everything in blend and color calculator
 646        * state.  This is not documented.
 647        */
 648       GLubyte *color_mask = ctx->Color.ColorMask[buf];
 649       for (int i = 0; i < 4; i++) {
 650          if (_mesa_format_has_color_component(irb->mt->format, i) &&
 651              !color_mask[i]) {
 652             perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n",
 653                        irb->mt->logical_width0, irb->mt->logical_height0);
 654             clear_type = PLAIN_CLEAR;
 655          }
 656       }
 657
 658       /* Allocate the MCS for non MSRT surfaces now if we're doing a fast
 659        * clear and we don't have the MCS yet.  On failure, fall back to
 660        * replicated clear.
 661        */
 662       if (clear_type == FAST_CLEAR && irb->mt->mcs_mt == NULL)
 663          if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt))
 664             clear_type = REP_CLEAR;
 665
 666       switch (clear_type) {
 667       case FAST_CLEAR:
 668          set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
 669          irb->need_downsample = true;
 670
 671          /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
 672           * clear is redundant and can be skipped.  Only skip after we've
 673           * updated the fast clear color above though.
 674           */
 675          if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR)
 676             continue;
 677
 678          /* Set fast_clear_state to RESOLVED so we don't try resolve them when
 679           * we draw, in case the mt is also bound as a texture.
 680           */
 681          irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 682          irb->need_downsample = true;
 683          fast_clear_buffers |= 1 << index;
 684          get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
 685          break;
 686
 687       case REP_CLEAR:
 688          rep_clear_buffers |= 1 << index;
 689          get_buffer_rect(fb, &clear_rect);
 690          break;
 691
 692       case PLAIN_CLEAR:
 693          plain_clear_buffers |= 1 << index;
 694          get_buffer_rect(fb, &clear_rect);
 695          continue;
 696       }
 697    }
 698
 699    assert((fast_clear_buffers & rep_clear_buffers) == 0);
 700
 701    if (!(fast_clear_buffers | rep_clear_buffers)) {
 702       if (plain_clear_buffers)
 703          /* If we only have plain clears, skip the meta save/restore. */
 704          goto out;
 705       else
 706          /* Nothing left to do.  This happens when we hit the redundant fast
 707           * clear case above and nothing else.
 708           */
 709          return true;
 710    }
 711
 712    meta_save =
 713       MESA_META_ALPHA_TEST |
 714       MESA_META_BLEND |
 715       MESA_META_DEPTH_TEST |
 716       MESA_META_RASTERIZATION |
 717       MESA_META_SHADER |
 718       MESA_META_STENCIL_TEST |
 719       MESA_META_VERTEX |
 720       MESA_META_VIEWPORT |
 721       MESA_META_CLIP |
 722       MESA_META_CLAMP_FRAGMENT_COLOR |
 723       MESA_META_MULTISAMPLE |
 724       MESA_META_OCCLUSION_QUERY |
 725       MESA_META_DRAW_BUFFERS;
 726
 727    _mesa_meta_begin(ctx, meta_save);
 728
 729    if (!brw_fast_clear_init(brw)) {
 730       /* This is going to be hard to recover from, most likely out of memory.
 731        * Bail and let meta try and (probably) fail for us.
 732        */
 733       plain_clear_buffers = buffers;
 734       goto bail_to_meta;
 735    }
 736
 737    /* Clears never have the color clamped. */
 738    if (ctx->Extensions.ARB_color_buffer_float)
 739       _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
 740
 741    _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
 742    _mesa_DepthMask(GL_FALSE);
 743    _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
 744
 745    use_rectlist(brw, true);
 746
 747    layers = MAX2(1, fb->MaxNumLayers);
 748
 749    if (brw->gen >= 9 && fast_clear_buffers) {
 750       fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect);
 751    } else if (fast_clear_buffers) {
 752       _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers);
 753       brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 754       set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
 755       brw_draw_rectlist(brw, &fast_clear_rect, layers);
 756       set_fast_clear_op(brw, 0);
 757
 758       /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
 759        * resolve them eventually.
 760        */
 761       for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
 762          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
 763          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 764          int index = fb->_ColorDrawBufferIndexes[buf];
 765
 766          if ((1 << index) & fast_clear_buffers)
 767             irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
 768       }
 769    }
 770
 771    if (rep_clear_buffers) {
 772       _mesa_meta_drawbuffers_from_bitfield(rep_clear_buffers);
 773       brw_bind_rep_write_shader(brw, ctx->Color.ClearColor.f);
 774       brw_draw_rectlist(brw, &clear_rect, layers);
 775    }
 776
 777  bail_to_meta:
 778    /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear
 779     * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if
 780     * we render to it.
 781     */
 782    brw->NewGLState |= _NEW_BUFFERS;
 783
 784
 785    /* Set the custom state back to normal and dirty the same bits as above */
 786    use_rectlist(brw, false);
 787
 788    _mesa_meta_end(ctx);
 789
 790    /* From BSpec: Render Target Fast Clear:
 791     *
 792     *     After Render target fast clear, pipe-control with color cache
 793     *     write-flush must be issued before sending any DRAW commands on that
 794     *     render target.
 795     */
 796    brw_emit_mi_flush(brw);
 797
 798    /* If we had to fall back to plain clear for any buffers, clear those now
 799     * by calling into meta.
 800     */
 801  out:
 802    if (plain_clear_buffers)
 803       _mesa_meta_glsl_Clear(&brw->ctx, plain_clear_buffers);
 804
 805    return true;
 806 }
 807
 808 static void
 809 get_resolve_rect(struct brw_context *brw,
 810                  struct intel_mipmap_tree *mt, struct rect *rect)
 811 {
 812    unsigned x_align, y_align;
 813    unsigned x_scaledown, y_scaledown;
 814
 815    /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
 816     *
 817     *     A rectangle primitive must be scaled down by the following factors
 818     *     with respect to render target being resolved.
 819     *
 820     * The scaledown factors in the table that follows are related to the
 821     * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
 822     * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
 823     * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
 824     * by a factor of 2.
 825     */
 826
 827    intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
 828    if (brw->gen >= 9) {
 829       x_scaledown = x_align * 8;
 830       y_scaledown = y_align * 8;
 831    } else if (brw->gen >= 8) {
 832       x_scaledown = x_align * 8;
 833       y_scaledown = y_align * 16;
 834    } else {
 835       x_scaledown = x_align / 2;
 836       y_scaledown = y_align / 2;
 837    }
 838    rect->x0 = rect->y0 = 0;
 839    rect->x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
 840    rect->y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
 841 }
 842
 843 void
 844 brw_meta_resolve_color(struct brw_context *brw,
 845                        struct intel_mipmap_tree *mt)
 846 {
 847    struct gl_context *ctx = &brw->ctx;
 848    GLuint fbo, rbo;
 849    struct rect rect;
 850
 851    brw_emit_mi_flush(brw);
 852
 853    _mesa_meta_begin(ctx, MESA_META_ALL);
 854
 855    _mesa_GenFramebuffers(1, &fbo);
 856    rbo = brw_get_rb_for_slice(brw, mt, 0, 0, false);
 857
 858    _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
 859    _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER,
 860                                  GL_COLOR_ATTACHMENT0,
 861                                  GL_RENDERBUFFER, rbo);
 862    _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);
 863
 864    brw_fast_clear_init(brw);
 865
 866    use_rectlist(brw, true);
 867
 868    brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 869
 870    /* SKL+ also has a resolve mode for compressed render targets and thus more
 871     * bits to let us select the type of resolve.  For fast clear resolves, it
 872     * turns out we can use the same value as pre-SKL though.
 873     */
 874    set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
 875
 876    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 877    get_resolve_rect(brw, mt, &rect);
 878
 879    brw_draw_rectlist(brw, &rect, 1);
 880
 881    set_fast_clear_op(brw, 0);
 882    use_rectlist(brw, false);
 883
 884    _mesa_DeleteRenderbuffers(1, &rbo);
 885    _mesa_DeleteFramebuffers(1, &fbo);
 886
 887    _mesa_meta_end(ctx);
 888
 889    /* We're typically called from intel_update_state() and we're supposed to
 890     * return with the state all updated to what it was before
 891     * brw_meta_resolve_color() was called.  The meta rendering will have
 892     * messed up the state and we need to call _mesa_update_state() again to
 893     * get back to where we were supposed to be when resolve was called.
 894     */
 895    if (ctx->NewState)
 896       _mesa_update_state(ctx);
 897 }