src/mesa/drivers/dri/i965/brw_meta_fast_clear.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "main/mtypes.h"
  25 #include "main/macros.h"
  26 #include "main/context.h"
  27 #include "main/objectlabel.h"
  28 #include "main/shaderapi.h"
  29 #include "main/arrayobj.h"
  30 #include "main/bufferobj.h"
  31 #include "main/buffers.h"
  32 #include "main/blend.h"
  33 #include "main/enable.h"
  34 #include "main/depth.h"
  35 #include "main/stencil.h"
  36 #include "main/varray.h"
  37 #include "main/uniforms.h"
  38 #include "main/fbobject.h"
  39 #include "main/texobj.h"
  40
  41 #include "main/api_validate.h"
  42 #include "main/state.h"
  43
  44 #include "util/format_srgb.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/meta.h"
  49
  50 #include "brw_defines.h"
  51 #include "brw_context.h"
  52 #include "brw_draw.h"
  53 #include "brw_state.h"
  54 #include "intel_fbo.h"
  55 #include "intel_batchbuffer.h"
  56
  57 #include "brw_blorp.h"
  58
  59 struct brw_fast_clear_state {
  60    struct gl_buffer_object *buf_obj;
  61    struct gl_vertex_array_object *array_obj;
  62    GLuint vao;
  63    GLuint shader_prog;
  64    GLint color_location;
  65 };
  66
  67 static bool
  68 brw_fast_clear_init(struct brw_context *brw)
  69 {
  70    struct brw_fast_clear_state *clear;
  71    struct gl_context *ctx = &brw->ctx;
  72
  73    if (brw->fast_clear_state) {
  74       clear = brw->fast_clear_state;
  75       _mesa_BindVertexArray(clear->vao);
  76       return true;
  77    }
  78
  79    brw->fast_clear_state = clear = malloc(sizeof *clear);
  80    if (clear == NULL)
  81       return false;
  82
  83    memset(clear, 0, sizeof *clear);
  84    _mesa_GenVertexArrays(1, &clear->vao);
  85    _mesa_BindVertexArray(clear->vao);
  86
  87    clear->buf_obj = ctx->Driver.NewBufferObject(ctx, 0xDEADBEEF);
  88    if (clear->buf_obj == NULL)
  89       return false;
  90
  91    clear->array_obj = _mesa_lookup_vao(ctx, clear->vao);
  92    assert(clear->array_obj != NULL);
  93
  94    _mesa_update_array_format(ctx, clear->array_obj, VERT_ATTRIB_GENERIC(0),
  95                              2, GL_FLOAT, GL_RGBA, GL_FALSE, GL_FALSE, GL_FALSE,
  96                              0, true);
  97    _mesa_bind_vertex_buffer(ctx, clear->array_obj, VERT_ATTRIB_GENERIC(0),
  98                             clear->buf_obj, 0, sizeof(float) * 2);
  99    _mesa_enable_vertex_array_attrib(ctx, clear->array_obj,
 100                                     VERT_ATTRIB_GENERIC(0));
 101
 102    return true;
 103 }
 104
 105 static void
 106 brw_bind_rep_write_shader(struct brw_context *brw, float *color)
 107 {
 108    const char *vs_source =
 109       "#extension GL_AMD_vertex_shader_layer : enable\n"
 110       "#extension GL_ARB_draw_instanced : enable\n"
 111       "attribute vec4 position;\n"
 112       "uniform int layer;\n"
 113       "void main()\n"
 114       "{\n"
 115       "#ifdef GL_AMD_vertex_shader_layer\n"
 116       "   gl_Layer = gl_InstanceID;\n"
 117       "#endif\n"
 118       "   gl_Position = position;\n"
 119       "}\n";
 120    const char *fs_source =
 121       "uniform vec4 color;\n"
 122       "void main()\n"
 123       "{\n"
 124       "   gl_FragColor = color;\n"
 125       "}\n";
 126
 127    GLuint vs, fs;
 128    struct brw_fast_clear_state *clear = brw->fast_clear_state;
 129    struct gl_context *ctx = &brw->ctx;
 130
 131    if (clear->shader_prog) {
 132       _mesa_UseProgram(clear->shader_prog);
 133       _mesa_Uniform4fv(clear->color_location, 1, color);
 134       return;
 135    }
 136
 137    vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, vs_source);
 138    fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, fs_source);
 139
 140    clear->shader_prog = _mesa_CreateProgram();
 141    _mesa_AttachShader(clear->shader_prog, fs);
 142    _mesa_DeleteShader(fs);
 143    _mesa_AttachShader(clear->shader_prog, vs);
 144    _mesa_DeleteShader(vs);
 145    _mesa_BindAttribLocation(clear->shader_prog, 0, "position");
 146    _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear");
 147    _mesa_LinkProgram(clear->shader_prog);
 148
 149    clear->color_location =
 150       _mesa_GetUniformLocation(clear->shader_prog, "color");
 151
 152    _mesa_UseProgram(clear->shader_prog);
 153    _mesa_Uniform4fv(clear->color_location, 1, color);
 154 }
 155
 156 void
 157 brw_meta_fast_clear_free(struct brw_context *brw)
 158 {
 159    struct brw_fast_clear_state *clear = brw->fast_clear_state;
 160    GET_CURRENT_CONTEXT(old_context);
 161
 162    if (clear == NULL)
 163       return;
 164
 165    _mesa_make_current(&brw->ctx, NULL, NULL);
 166
 167    _mesa_DeleteVertexArrays(1, &clear->vao);
 168    _mesa_reference_buffer_object(&brw->ctx, &clear->buf_obj, NULL);
 169    _mesa_DeleteProgram(clear->shader_prog);
 170    free(clear);
 171
 172    if (old_context)
 173       _mesa_make_current(old_context, old_context->WinSysDrawBuffer, old_context->WinSysReadBuffer);
 174    else
 175       _mesa_make_current(NULL, NULL, NULL);
 176 }
 177
 178 struct rect {
 179    int x0, y0, x1, y1;
 180 };
 181
 182 static void
 183 brw_draw_rectlist(struct brw_context *brw, struct rect *rect, int num_instances)
 184 {
 185    struct gl_context *ctx = &brw->ctx;
 186    struct brw_fast_clear_state *clear = brw->fast_clear_state;
 187    int start = 0, count = 3;
 188    struct _mesa_prim prim;
 189    float verts[6];
 190
 191    verts[0] = rect->x1;
 192    verts[1] = rect->y1;
 193    verts[2] = rect->x0;
 194    verts[3] = rect->y1;
 195    verts[4] = rect->x0;
 196    verts[5] = rect->y0;
 197
 198    /* upload new vertex data */
 199    _mesa_buffer_data(ctx, clear->buf_obj, GL_NONE, sizeof(verts), verts,
 200                      GL_DYNAMIC_DRAW, __func__);
 201
 202    if (ctx->NewState)
 203       _mesa_update_state(ctx);
 204
 205    vbo_bind_arrays(ctx);
 206
 207    memset(&prim, 0, sizeof prim);
 208    prim.begin = 1;
 209    prim.end = 1;
 210    prim.mode = BRW_PRIM_OFFSET + _3DPRIM_RECTLIST;
 211    prim.num_instances = num_instances;
 212    prim.start = start;
 213    prim.count = count;
 214
 215    /* Make sure our internal prim value doesn't clash with a valid GL value. */
 216    assert(!_mesa_is_valid_prim_mode(ctx, prim.mode));
 217
 218    brw_draw_prims(ctx, &prim, 1, NULL,
 219                   GL_TRUE, start, start + count - 1,
 220                   NULL, 0, NULL);
 221 }
 222
 223 static void
 224 get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
 225                     struct intel_renderbuffer *irb, struct rect *rect)
 226 {
 227    unsigned int x_align, y_align;
 228    unsigned int x_scaledown, y_scaledown;
 229
 230    if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) {
 231       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 232        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
 233        *
 234        *     Clear pass must have a clear rectangle that must follow
 235        *     alignment rules in terms of pixels and lines as shown in the
 236        *     table below. Further, the clear-rectangle height and width
 237        *     must be multiple of the following dimensions. If the height
 238        *     and width of the render target being cleared do not meet these
 239        *     requirements, an MCS buffer can be created such that it
 240        *     follows the requirement and covers the RT.
 241        *
 242        * The alignment size in the table that follows is related to the
 243        * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
 244        * with X alignment multiplied by 16 and Y alignment multiplied by 32.
 245        */
 246       intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
 247       x_align *= 16;
 248
 249       /* SKL+ line alignment requirement for Y-tiled are half those of the prior
 250        * generations.
 251        */
 252       if (brw->gen >= 9)
 253          y_align *= 16;
 254       else
 255          y_align *= 32;
 256
 257       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 258        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
 259        *
 260        *     In order to optimize the performance MCS buffer (when bound to
 261        *     1X RT) clear similarly to MCS buffer clear for MSRT case,
 262        *     clear rect is required to be scaled by the following factors
 263        *     in the horizontal and vertical directions:
 264        *
 265        * The X and Y scale down factors in the table that follows are each
 266        * equal to half the alignment value computed above.
 267        */
 268       x_scaledown = x_align / 2;
 269       y_scaledown = y_align / 2;
 270
 271       /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
 272        * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
 273        * Clear of Non-MultiSampled Render Target Restrictions":
 274        *
 275        *   Clear rectangle must be aligned to two times the number of
 276        *   pixels in the table shown below due to 16x16 hashing across the
 277        *   slice.
 278        */
 279       x_align *= 2;
 280       y_align *= 2;
 281    } else {
 282       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
 283        * Target(s)", beneath the "MSAA Compression" bullet (p326):
 284        *
 285        *     Clear pass for this case requires that scaled down primitive
 286        *     is sent down with upper left co-ordinate to coincide with
 287        *     actual rectangle being cleared. For MSAA, clear rectangle’s
 288        *     height and width need to as show in the following table in
 289        *     terms of (width,height) of the RT.
 290        *
 291        *     MSAA  Width of Clear Rect  Height of Clear Rect
 292        *      2X     Ceil(1/8*width)      Ceil(1/2*height)
 293        *      4X     Ceil(1/8*width)      Ceil(1/2*height)
 294        *      8X     Ceil(1/2*width)      Ceil(1/2*height)
 295        *     16X         width            Ceil(1/2*height)
 296        *
 297        * The text "with upper left co-ordinate to coincide with actual
 298        * rectangle being cleared" is a little confusing--it seems to imply
 299        * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
 300        * feed the pipeline using the rectangle (x,y) to
 301        * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on
 302        * the number of samples.  Experiments indicate that this is not
 303        * quite correct; actually, what the hardware appears to do is to
 304        * align whatever rectangle is sent down the pipeline to the nearest
 305        * multiple of 2x2 blocks, and then scale it up by a factor of N
 306        * horizontally and 2 vertically.  So the resulting alignment is 4
 307        * vertically and either 4 or 16 horizontally, and the scaledown
 308        * factor is 2 vertically and either 2 or 8 horizontally.
 309        */
 310       switch (irb->mt->num_samples) {
 311       case 2:
 312       case 4:
 313          x_scaledown = 8;
 314          break;
 315       case 8:
 316          x_scaledown = 2;
 317          break;
 318       case 16:
 319          x_scaledown = 1;
 320          break;
 321       default:
 322          unreachable("Unexpected sample count for fast clear");
 323       }
 324       y_scaledown = 2;
 325       x_align = x_scaledown * 2;
 326       y_align = y_scaledown * 2;
 327    }
 328
 329    rect->x0 = fb->_Xmin;
 330    rect->x1 = fb->_Xmax;
 331    if (fb->Name != 0) {
 332       rect->y0 = fb->_Ymin;
 333       rect->y1 = fb->_Ymax;
 334    } else {
 335       rect->y0 = fb->Height - fb->_Ymax;
 336       rect->y1 = fb->Height - fb->_Ymin;
 337    }
 338
 339    rect->x0 = ROUND_DOWN_TO(rect->x0,  x_align) / x_scaledown;
 340    rect->y0 = ROUND_DOWN_TO(rect->y0, y_align) / y_scaledown;
 341    rect->x1 = ALIGN(rect->x1, x_align) / x_scaledown;
 342    rect->y1 = ALIGN(rect->y1, y_align) / y_scaledown;
 343 }
 344
 345 static void
 346 get_buffer_rect(const struct gl_framebuffer *fb, struct rect *rect)
 347 {
 348    rect->x0 = fb->_Xmin;
 349    rect->x1 = fb->_Xmax;
 350    if (fb->Name != 0) {
 351       rect->y0 = fb->_Ymin;
 352       rect->y1 = fb->_Ymax;
 353    } else {
 354       rect->y0 = fb->Height - fb->_Ymax;
 355       rect->y1 = fb->Height - fb->_Ymin;
 356    }
 357 }
 358
 359 /**
 360  * Determine if fast color clear supports the given clear color.
 361  *
 362  * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
 363  * moment we only support floating point, unorm, and snorm buffers.
 364  */
 365 static bool
 366 is_color_fast_clear_compatible(struct brw_context *brw,
 367                                mesa_format format,
 368                                const union gl_color_union *color)
 369 {
 370    if (_mesa_is_format_integer_color(format)) {
 371       if (brw->gen >= 8) {
 372          perf_debug("Integer fast clear not enabled for (%s)",
 373                     _mesa_get_format_name(format));
 374       }
 375       return false;
 376    }
 377
 378    for (int i = 0; i < 4; i++) {
 379       if (!_mesa_format_has_color_component(format, i)) {
 380          continue;
 381       }
 382
 383       if (brw->gen < 9 &&
 384           color->f[i] != 0.0f && color->f[i] != 1.0f) {
 385          return false;
 386       }
 387    }
 388    return true;
 389 }
 390
 391 /**
 392  * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
 393  * SURFACE_STATE (DWORD 12-15 on SKL+).
 394  */
 395 static void
 396 set_fast_clear_color(struct brw_context *brw,
 397                      struct intel_mipmap_tree *mt,
 398                      const union gl_color_union *color)
 399 {
 400    union gl_color_union override_color = *color;
 401
 402    /* The sampler doesn't look at the format of the surface when the fast
 403     * clear color is used so we need to implement luminance, intensity and
 404     * missing components manually.
 405     */
 406    switch (_mesa_get_format_base_format(mt->format)) {
 407    case GL_INTENSITY:
 408       override_color.ui[3] = override_color.ui[0];
 409       /* flow through */
 410    case GL_LUMINANCE:
 411    case GL_LUMINANCE_ALPHA:
 412       override_color.ui[1] = override_color.ui[0];
 413       override_color.ui[2] = override_color.ui[0];
 414       break;
 415    default:
 416       for (int i = 0; i < 3; i++) {
 417          if (!_mesa_format_has_color_component(mt->format, i))
 418             override_color.ui[i] = 0;
 419       }
 420       break;
 421    }
 422
 423    if (!_mesa_format_has_color_component(mt->format, 3)) {
 424       if (_mesa_is_format_integer_color(mt->format))
 425          override_color.ui[3] = 1;
 426       else
 427          override_color.f[3] = 1.0f;
 428    }
 429
 430    /* Handle linear→SRGB conversion */
 431    if (brw->ctx.Color.sRGBEnabled &&
 432        _mesa_get_srgb_format_linear(mt->format) != mt->format) {
 433       for (int i = 0; i < 3; i++) {
 434          override_color.f[i] =
 435             util_format_linear_to_srgb_float(override_color.f[i]);
 436       }
 437    }
 438
 439    if (brw->gen >= 9) {
 440       mt->gen9_fast_clear_color = override_color;
 441    } else {
 442       mt->fast_clear_color_value = 0;
 443       for (int i = 0; i < 4; i++) {
 444          /* Testing for non-0 works for integer and float colors */
 445          if (override_color.f[i] != 0.0f) {
 446              mt->fast_clear_color_value |=
 447                 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
 448          }
 449       }
 450    }
 451 }
 452
 453 static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
 454
 455 static void
 456 set_fast_clear_op(struct brw_context *brw, uint32_t op)
 457 {
 458    /* Set op and dirty BRW_NEW_FRAGMENT_PROGRAM to make sure we re-emit
 459     * 3DSTATE_PS.
 460     */
 461    brw->wm.fast_clear_op = op;
 462    brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 463 }
 464
 465 static void
 466 use_rectlist(struct brw_context *brw, bool enable)
 467 {
 468    /* Set custom state to let us use _3DPRIM_RECTLIST and the replicated
 469     * rendertarget write.  When we enable reclist mode, we disable the
 470     * viewport transform, disable clipping, enable the rep16 write
 471     * optimization and disable simd8 dispatch in the PS.
 472     */
 473    brw->sf.viewport_transform_enable = !enable;
 474    brw->use_rep_send = enable;
 475    brw->no_simd8 = enable;
 476
 477    /* Dirty state to make sure we reemit the state packages affected by the
 478     * custom state.  We dirty BRW_NEW_FRAGMENT_PROGRAM to emit 3DSTATE_PS for
 479     * disabling simd8 dispatch, _NEW_LIGHT to emit 3DSTATE_SF for disabling
 480     * the viewport transform and 3DSTATE_CLIP to disable clipping for the
 481     * reclist primitive.  This is a little messy - it would be nicer to
 482     * BRW_NEW_FAST_CLEAR flag or so, but we're out of brw state bits.  Dirty
 483     * _NEW_BUFFERS to make sure we emit new SURFACE_STATE with the new fast
 484     * clear color value.
 485     */
 486    brw->NewGLState |= _NEW_LIGHT | _NEW_BUFFERS;
 487    brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 488 }
 489
 490 /**
 491  * Individually fast clear each color buffer attachment. On previous gens this
 492  * isn't required. The motivation for this comes from one line (which seems to
 493  * be specific to SKL+). The list item is in section titled _MCS Buffer for
 494  * Render Target(s)_
 495  *
 496  *   "Since only one RT is bound with a clear pass, only one RT can be cleared
 497  *   at a time. To clear multiple RTs, multiple clear passes are required."
 498  *
 499  * The code follows the same idea as the resolve code which creates a fake FBO
 500  * to avoid interfering with too much of the GL state.
 501  */
 502 static void
 503 fast_clear_attachments(struct brw_context *brw,
 504                        struct gl_framebuffer *fb,
 505                        uint32_t fast_clear_buffers,
 506                        struct rect fast_clear_rect)
 507 {
 508    assert(brw->gen >= 9);
 509
 510    brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 511
 512    /* SKL+ also has a resolve mode for compressed render targets and thus more
 513     * bits to let us select the type of resolve.  For fast clear resolves, it
 514     * turns out we can use the same value as pre-SKL though.
 515     */
 516    set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
 517
 518    while (fast_clear_buffers) {
 519       int index = ffs(fast_clear_buffers) - 1;
 520
 521       fast_clear_buffers &= ~(1 << index);
 522
 523       _mesa_meta_drawbuffers_from_bitfield(1 << index);
 524
 525       brw_draw_rectlist(brw, &fast_clear_rect, MAX2(1, fb->MaxNumLayers));
 526
 527       /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
 528        * resolve them eventually.
 529        */
 530       struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0];
 531       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 532       irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
 533    }
 534
 535    set_fast_clear_op(brw, 0);
 536 }
 537
 538 bool
 539 brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
 540                     GLbitfield buffers, bool partial_clear)
 541 {
 542    struct gl_context *ctx = &brw->ctx;
 543    mesa_format format;
 544    enum { FAST_CLEAR, REP_CLEAR, PLAIN_CLEAR } clear_type;
 545    GLbitfield plain_clear_buffers, meta_save, rep_clear_buffers, fast_clear_buffers;
 546    struct rect fast_clear_rect, clear_rect;
 547    int layers;
 548
 549    fast_clear_buffers = rep_clear_buffers = plain_clear_buffers = 0;
 550
 551    /* First we loop through the color draw buffers and determine which ones
 552     * can be fast cleared, which ones can use the replicated write and which
 553     * ones have to fall back to regular color clear.
 554     */
 555    for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
 556       struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
 557       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 558       int index = fb->_ColorDrawBufferIndexes[buf];
 559
 560       /* Only clear the buffers present in the provided mask */
 561       if (((1 << index) & buffers) == 0)
 562          continue;
 563
 564       /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
 565        * the framebuffer can be complete with some attachments missing.  In
 566        * this case the _ColorDrawBuffers pointer will be NULL.
 567        */
 568       if (rb == NULL)
 569          continue;
 570
 571       clear_type = FAST_CLEAR;
 572
 573       /* We don't have fast clear until gen7. */
 574       if (brw->gen < 7)
 575          clear_type = REP_CLEAR;
 576
 577       /* If we're mapping the render format to a different format than the
 578        * format we use for texturing then it is a bit questionable whether it
 579        * should be possible to use a fast clear. Although we only actually
 580        * render using a renderable format, without the override workaround it
 581        * wouldn't be possible to have a non-renderable surface in a fast clear
 582        * state so the hardware probably legitimately doesn't need to support
 583        * this case. At least on Gen9 this really does seem to cause problems.
 584        */
 585       if (brw->gen >= 9 &&
 586           brw_format_for_mesa_format(irb->mt->format) !=
 587           brw->render_target_format[irb->mt->format])
 588          clear_type = REP_CLEAR;
 589
 590       if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
 591          clear_type = REP_CLEAR;
 592
 593       /* We can't do scissored fast clears because of the restrictions on the
 594        * fast clear rectangle size.
 595        */
 596       if (partial_clear)
 597          clear_type = REP_CLEAR;
 598
 599       /* Fast clear is only supported for colors where all components are
 600        * either 0 or 1.
 601        */
 602       format = _mesa_get_render_format(ctx, irb->mt->format);
 603       if (!is_color_fast_clear_compatible(brw, format, &ctx->Color.ClearColor))
 604          clear_type = REP_CLEAR;
 605
 606       /* From the SNB PRM (Vol4_Part1):
 607        *
 608        *     "Replicated data (Message Type = 111) is only supported when
 609        *      accessing tiled memory.  Using this Message Type to access
 610        *      linear (untiled) memory is UNDEFINED."
 611        */
 612       if (irb->mt->tiling == I915_TILING_NONE) {
 613          perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n",
 614                     irb->mt->logical_width0, irb->mt->logical_height0);
 615          clear_type = PLAIN_CLEAR;
 616       }
 617
 618       /* Constant color writes ignore everything in blend and color calculator
 619        * state.  This is not documented.
 620        */
 621       GLubyte *color_mask = ctx->Color.ColorMask[buf];
 622       for (int i = 0; i < 4; i++) {
 623          if (_mesa_format_has_color_component(irb->mt->format, i) &&
 624              !color_mask[i]) {
 625             perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n",
 626                        irb->mt->logical_width0, irb->mt->logical_height0);
 627             clear_type = PLAIN_CLEAR;
 628          }
 629       }
 630
 631       /* Allocate the MCS for non MSRT surfaces now if we're doing a fast
 632        * clear and we don't have the MCS yet.  On failure, fall back to
 633        * replicated clear.
 634        */
 635       if (clear_type == FAST_CLEAR && irb->mt->mcs_mt == NULL)
 636          if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt))
 637             clear_type = REP_CLEAR;
 638
 639       switch (clear_type) {
 640       case FAST_CLEAR:
 641          set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
 642          irb->need_downsample = true;
 643
 644          /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
 645           * clear is redundant and can be skipped.  Only skip after we've
 646           * updated the fast clear color above though.
 647           */
 648          if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR)
 649             continue;
 650
 651          /* Set fast_clear_state to RESOLVED so we don't try resolve them when
 652           * we draw, in case the mt is also bound as a texture.
 653           */
 654          irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 655          irb->need_downsample = true;
 656          fast_clear_buffers |= 1 << index;
 657          get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
 658          break;
 659
 660       case REP_CLEAR:
 661          rep_clear_buffers |= 1 << index;
 662          get_buffer_rect(fb, &clear_rect);
 663          break;
 664
 665       case PLAIN_CLEAR:
 666          plain_clear_buffers |= 1 << index;
 667          get_buffer_rect(fb, &clear_rect);
 668          continue;
 669       }
 670    }
 671
 672    assert((fast_clear_buffers & rep_clear_buffers) == 0);
 673
 674    if (!(fast_clear_buffers | rep_clear_buffers)) {
 675       if (plain_clear_buffers)
 676          /* If we only have plain clears, skip the meta save/restore. */
 677          goto out;
 678       else
 679          /* Nothing left to do.  This happens when we hit the redundant fast
 680           * clear case above and nothing else.
 681           */
 682          return true;
 683    }
 684
 685    meta_save =
 686       MESA_META_ALPHA_TEST |
 687       MESA_META_BLEND |
 688       MESA_META_DEPTH_TEST |
 689       MESA_META_RASTERIZATION |
 690       MESA_META_SHADER |
 691       MESA_META_STENCIL_TEST |
 692       MESA_META_VERTEX |
 693       MESA_META_VIEWPORT |
 694       MESA_META_CLIP |
 695       MESA_META_CLAMP_FRAGMENT_COLOR |
 696       MESA_META_MULTISAMPLE |
 697       MESA_META_OCCLUSION_QUERY |
 698       MESA_META_DRAW_BUFFERS;
 699
 700    _mesa_meta_begin(ctx, meta_save);
 701
 702    if (!brw_fast_clear_init(brw)) {
 703       /* This is going to be hard to recover from, most likely out of memory.
 704        * Bail and let meta try and (probably) fail for us.
 705        */
 706       plain_clear_buffers = buffers;
 707       goto bail_to_meta;
 708    }
 709
 710    /* Clears never have the color clamped. */
 711    if (ctx->Extensions.ARB_color_buffer_float)
 712       _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
 713
 714    _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
 715    _mesa_DepthMask(GL_FALSE);
 716    _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
 717
 718    use_rectlist(brw, true);
 719
 720    layers = MAX2(1, fb->MaxNumLayers);
 721
 722    if (brw->gen >= 9 && fast_clear_buffers) {
 723       fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect);
 724    } else if (fast_clear_buffers) {
 725       _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers);
 726       brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 727       set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
 728       brw_draw_rectlist(brw, &fast_clear_rect, layers);
 729       set_fast_clear_op(brw, 0);
 730
 731       /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
 732        * resolve them eventually.
 733        */
 734       for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
 735          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
 736          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 737          int index = fb->_ColorDrawBufferIndexes[buf];
 738
 739          if ((1 << index) & fast_clear_buffers)
 740             irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
 741       }
 742    }
 743
 744    if (rep_clear_buffers) {
 745       _mesa_meta_drawbuffers_from_bitfield(rep_clear_buffers);
 746       brw_bind_rep_write_shader(brw, ctx->Color.ClearColor.f);
 747       brw_draw_rectlist(brw, &clear_rect, layers);
 748    }
 749
 750  bail_to_meta:
 751    /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear
 752     * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if
 753     * we render to it.
 754     */
 755    brw->NewGLState |= _NEW_BUFFERS;
 756
 757
 758    /* Set the custom state back to normal and dirty the same bits as above */
 759    use_rectlist(brw, false);
 760
 761    _mesa_meta_end(ctx);
 762
 763    /* From BSpec: Render Target Fast Clear:
 764     *
 765     *     After Render target fast clear, pipe-control with color cache
 766     *     write-flush must be issued before sending any DRAW commands on that
 767     *     render target.
 768     */
 769    brw_emit_mi_flush(brw);
 770
 771    /* If we had to fall back to plain clear for any buffers, clear those now
 772     * by calling into meta.
 773     */
 774  out:
 775    if (plain_clear_buffers)
 776       _mesa_meta_glsl_Clear(&brw->ctx, plain_clear_buffers);
 777
 778    return true;
 779 }
 780
 781 static void
 782 get_resolve_rect(struct brw_context *brw,
 783                  struct intel_mipmap_tree *mt, struct rect *rect)
 784 {
 785    unsigned x_align, y_align;
 786    unsigned x_scaledown, y_scaledown;
 787
 788    /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
 789     *
 790     *     A rectangle primitive must be scaled down by the following factors
 791     *     with respect to render target being resolved.
 792     *
 793     * The scaledown factors in the table that follows are related to the
 794     * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
 795     * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
 796     * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
 797     * by a factor of 2.
 798     */
 799
 800    intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
 801    if (brw->gen >= 9) {
 802       x_scaledown = x_align * 8;
 803       y_scaledown = y_align * 8;
 804    } else if (brw->gen >= 8) {
 805       x_scaledown = x_align * 8;
 806       y_scaledown = y_align * 16;
 807    } else {
 808       x_scaledown = x_align / 2;
 809       y_scaledown = y_align / 2;
 810    }
 811    rect->x0 = rect->y0 = 0;
 812    rect->x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
 813    rect->y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
 814 }
 815
 816 void
 817 brw_meta_resolve_color(struct brw_context *brw,
 818                        struct intel_mipmap_tree *mt)
 819 {
 820    struct gl_context *ctx = &brw->ctx;
 821    GLuint fbo, rbo;
 822    struct rect rect;
 823
 824    brw_emit_mi_flush(brw);
 825
 826    _mesa_meta_begin(ctx, MESA_META_ALL);
 827
 828    _mesa_GenFramebuffers(1, &fbo);
 829    rbo = brw_get_rb_for_slice(brw, mt, 0, 0, false);
 830
 831    _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
 832    _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER,
 833                                  GL_COLOR_ATTACHMENT0,
 834                                  GL_RENDERBUFFER, rbo);
 835    _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);
 836
 837    brw_fast_clear_init(brw);
 838
 839    use_rectlist(brw, true);
 840
 841    brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 842
 843    /* SKL+ also has a resolve mode for compressed render targets and thus more
 844     * bits to let us select the type of resolve.  For fast clear resolves, it
 845     * turns out we can use the same value as pre-SKL though.
 846     */
 847    set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
 848
 849    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 850    get_resolve_rect(brw, mt, &rect);
 851
 852    brw_draw_rectlist(brw, &rect, 1);
 853
 854    set_fast_clear_op(brw, 0);
 855    use_rectlist(brw, false);
 856
 857    _mesa_DeleteRenderbuffers(1, &rbo);
 858    _mesa_DeleteFramebuffers(1, &fbo);
 859
 860    _mesa_meta_end(ctx);
 861
 862    /* We're typically called from intel_update_state() and we're supposed to
 863     * return with the state all updated to what it was before
 864     * brw_meta_resolve_color() was called.  The meta rendering will have
 865     * messed up the state and we need to call _mesa_update_state() again to
 866     * get back to where we were supposed to be when resolve was called.
 867     */
 868    if (ctx->NewState)
 869       _mesa_update_state(ctx);
 870 }