src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 191
 192    /* If the texture's format is alpha-only, force R, G, and B to
 193     * 0.0. Similarly, if the texture's format has no alpha channel,
 194     * force the alpha value read to 1.0. This allows for the
 195     * implementation to use an RGBA texture for any of these formats
 196     * without leaking any unexpected values.
 197     */
 198    switch (img->_BaseFormat) {
 199    case GL_ALPHA:
 200       swizzles[0] = SWIZZLE_ZERO;
 201       swizzles[1] = SWIZZLE_ZERO;
 202       swizzles[2] = SWIZZLE_ZERO;
 203       break;
 204    case GL_LUMINANCE:
 205       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 206          swizzles[0] = SWIZZLE_X;
 207          swizzles[1] = SWIZZLE_X;
 208          swizzles[2] = SWIZZLE_X;
 209          swizzles[3] = SWIZZLE_ONE;
 210       }
 211       break;
 212    case GL_LUMINANCE_ALPHA:
 213       if (datatype == GL_SIGNED_NORMALIZED) {
 214          swizzles[0] = SWIZZLE_X;
 215          swizzles[1] = SWIZZLE_X;
 216          swizzles[2] = SWIZZLE_X;
 217          swizzles[3] = SWIZZLE_W;
 218       }
 219       break;
 220    case GL_INTENSITY:
 221       if (datatype == GL_SIGNED_NORMALIZED) {
 222          swizzles[0] = SWIZZLE_X;
 223          swizzles[1] = SWIZZLE_X;
 224          swizzles[2] = SWIZZLE_X;
 225          swizzles[3] = SWIZZLE_X;
 226       }
 227       break;
 228    case GL_RED:
 229    case GL_RG:
 230    case GL_RGB:
 231       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 232          swizzles[3] = SWIZZLE_ONE;
 233       break;
 234    }
 235
 236    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 237                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 238                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 239                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 240 }
 241
 242 static void
 243 gen4_emit_buffer_surface_state(struct brw_context *brw,
 244                                uint32_t *out_offset,
 245                                drm_intel_bo *bo,
 246                                unsigned buffer_offset,
 247                                unsigned surface_format,
 248                                unsigned buffer_size,
 249                                unsigned pitch,
 250                                bool rw)
 251 {
 252    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 253                                     6 * 4, 32, out_offset);
 254    memset(surf, 0, 6 * 4);
 255
 256    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 257              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 258              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 259    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 260    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 261              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 262    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 263              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 264
 265    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 266     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 267     * physical cache.  It is mapped in hardware to the sampler cache."
 268     */
 269    if (bo) {
 270       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 271                               bo, buffer_offset,
 272                               I915_GEM_DOMAIN_SAMPLER,
 273                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 274    }
 275 }
 276
 277 void
 278 brw_update_buffer_texture_surface(struct gl_context *ctx,
 279                                   unsigned unit,
 280                                   uint32_t *surf_offset)
 281 {
 282    struct brw_context *brw = brw_context(ctx);
 283    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 284    struct intel_buffer_object *intel_obj =
 285       intel_buffer_object(tObj->BufferObject);
 286    uint32_t size = tObj->BufferSize;
 287    drm_intel_bo *bo = NULL;
 288    mesa_format format = tObj->_BufferObjectFormat;
 289    uint32_t brw_format = brw_format_for_mesa_format(format);
 290    int texel_size = _mesa_get_format_bytes(format);
 291
 292    if (intel_obj) {
 293       size = MIN2(size, intel_obj->Base.Size);
 294       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 295    }
 296
 297    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 298       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 299                     _mesa_get_format_name(format));
 300    }
 301
 302    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 303                                        tObj->BufferOffset,
 304                                        brw_format,
 305                                        size / texel_size,
 306                                        texel_size,
 307                                        false /* rw */);
 308 }
 309
 310 static void
 311 brw_update_texture_surface(struct gl_context *ctx,
 312                            unsigned unit,
 313                            uint32_t *surf_offset,
 314                            bool for_gather)
 315 {
 316    struct brw_context *brw = brw_context(ctx);
 317    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 318    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 319    struct intel_mipmap_tree *mt = intelObj->mt;
 320    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 321    uint32_t *surf;
 322
 323    /* BRW_NEW_TEXTURE_BUFFER */
 324    if (tObj->Target == GL_TEXTURE_BUFFER) {
 325       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 326       return;
 327    }
 328
 329    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 330                           6 * 4, 32, surf_offset);
 331
 332    uint32_t tex_format = translate_tex_format(brw, mt->format,
 333                                               sampler->sRGBDecode);
 334
 335    if (for_gather) {
 336       /* Sandybridge's gather4 message is broken for integer formats.
 337        * To work around this, we pretend the surface is UNORM for
 338        * 8 or 16-bit formats, and emit shader instructions to recover
 339        * the real INT/UINT value.  For 32-bit formats, we pretend
 340        * the surface is FLOAT, and simply reinterpret the resulting
 341        * bits.
 342        */
 343       switch (tex_format) {
 344       case BRW_SURFACEFORMAT_R8_SINT:
 345       case BRW_SURFACEFORMAT_R8_UINT:
 346          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 347          break;
 348
 349       case BRW_SURFACEFORMAT_R16_SINT:
 350       case BRW_SURFACEFORMAT_R16_UINT:
 351          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 352          break;
 353
 354       case BRW_SURFACEFORMAT_R32_SINT:
 355       case BRW_SURFACEFORMAT_R32_UINT:
 356          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 357          break;
 358
 359       default:
 360          break;
 361       }
 362    }
 363
 364    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 365               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 366               BRW_SURFACE_CUBEFACE_ENABLES |
 367               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 368
 369    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 370
 371    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 372               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 373               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 374
 375    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 376               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 377               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 378
 379    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 380               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 381
 382    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 383
 384    /* Emit relocation to surface contents */
 385    drm_intel_bo_emit_reloc(brw->batch.bo,
 386                            *surf_offset + 4,
 387                            mt->bo,
 388                            surf[1] - mt->bo->offset64,
 389                            I915_GEM_DOMAIN_SAMPLER, 0);
 390 }
 391
 392 /**
 393  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 394  * read from this buffer with Data Port Read instructions/messages.
 395  */
 396 void
 397 brw_create_constant_surface(struct brw_context *brw,
 398                             drm_intel_bo *bo,
 399                             uint32_t offset,
 400                             uint32_t size,
 401                             uint32_t *out_offset,
 402                             bool dword_pitch)
 403 {
 404    uint32_t stride = dword_pitch ? 4 : 16;
 405    uint32_t elements = ALIGN(size, stride) / stride;
 406
 407    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 408                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 409                                        elements, stride, false);
 410 }
 411
 412 /**
 413  * Set up a binding table entry for use by stream output logic (transform
 414  * feedback).
 415  *
 416  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 417  */
 418 void
 419 brw_update_sol_surface(struct brw_context *brw,
 420                        struct gl_buffer_object *buffer_obj,
 421                        uint32_t *out_offset, unsigned num_vector_components,
 422                        unsigned stride_dwords, unsigned offset_dwords)
 423 {
 424    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 425    uint32_t offset_bytes = 4 * offset_dwords;
 426    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 427                                              offset_bytes,
 428                                              buffer_obj->Size - offset_bytes);
 429    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 430                                     out_offset);
 431    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 432    size_t size_dwords = buffer_obj->Size / 4;
 433    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 434
 435    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 436     * too big to map using a single binding table entry?
 437     */
 438    assert((size_dwords - offset_dwords) / stride_dwords
 439           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 440
 441    if (size_dwords > offset_dwords + num_vector_components) {
 442       /* There is room for at least 1 transform feedback output in the buffer.
 443        * Compute the number of additional transform feedback outputs the
 444        * buffer has room for.
 445        */
 446       buffer_size_minus_1 =
 447          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 448    } else {
 449       /* There isn't even room for a single transform feedback output in the
 450        * buffer.  We can't configure the binding table entry to prevent output
 451        * entirely; we'll have to rely on the geometry shader to detect
 452        * overflow.  But to minimize the damage in case of a bug, set up the
 453        * binding table entry to just allow a single output.
 454        */
 455       buffer_size_minus_1 = 0;
 456    }
 457    width = buffer_size_minus_1 & 0x7f;
 458    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 459    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 460
 461    switch (num_vector_components) {
 462    case 1:
 463       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 464       break;
 465    case 2:
 466       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 467       break;
 468    case 3:
 469       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 470       break;
 471    case 4:
 472       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 473       break;
 474    default:
 475       unreachable("Invalid vector size for transform feedback output");
 476    }
 477
 478    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 479       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 480       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 481       BRW_SURFACE_RC_READ_WRITE;
 482    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 483    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 484               height << BRW_SURFACE_HEIGHT_SHIFT);
 485    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 486               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 487    surf[4] = 0;
 488    surf[5] = 0;
 489
 490    /* Emit relocation to surface contents. */
 491    drm_intel_bo_emit_reloc(brw->batch.bo,
 492                            *out_offset + 4,
 493                            bo, offset_bytes,
 494                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 495 }
 496
 497 /* Creates a new WM constant buffer reflecting the current fragment program's
 498  * constants, if needed by the fragment program.
 499  *
 500  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 501  * state atom.
 502  */
 503 static void
 504 brw_upload_wm_pull_constants(struct brw_context *brw)
 505 {
 506    struct brw_stage_state *stage_state = &brw->wm.base;
 507    /* BRW_NEW_FRAGMENT_PROGRAM */
 508    struct brw_fragment_program *fp =
 509       (struct brw_fragment_program *) brw->fragment_program;
 510    /* BRW_NEW_FS_PROG_DATA */
 511    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 512
 513    /* _NEW_PROGRAM_CONSTANTS */
 514    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 515                              stage_state, prog_data, true);
 516 }
 517
 518 const struct brw_tracked_state brw_wm_pull_constants = {
 519    .dirty = {
 520       .mesa = _NEW_PROGRAM_CONSTANTS,
 521       .brw = BRW_NEW_BATCH |
 522              BRW_NEW_FRAGMENT_PROGRAM |
 523              BRW_NEW_FS_PROG_DATA,
 524    },
 525    .emit = brw_upload_wm_pull_constants,
 526 };
 527
 528 /**
 529  * Creates a null renderbuffer surface.
 530  *
 531  * This is used when the shader doesn't write to any color output.  An FB
 532  * write to target 0 will still be emitted, because that's how the thread is
 533  * terminated (and computed depth is returned), so we need to have the
 534  * hardware discard the target 0 color output..
 535  */
 536 static void
 537 brw_emit_null_surface_state(struct brw_context *brw,
 538                             unsigned width,
 539                             unsigned height,
 540                             unsigned samples,
 541                             uint32_t *out_offset)
 542 {
 543    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 544     * Notes):
 545     *
 546     *     A null surface will be used in instances where an actual surface is
 547     *     not bound. When a write message is generated to a null surface, no
 548     *     actual surface is written to. When a read message (including any
 549     *     sampling engine message) is generated to a null surface, the result
 550     *     is all zeros. Note that a null surface type is allowed to be used
 551     *     with all messages, even if it is not specificially indicated as
 552     *     supported. All of the remaining fields in surface state are ignored
 553     *     for null surfaces, with the following exceptions:
 554     *
 555     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 556     *       depth buffer’s corresponding state for all render target surfaces,
 557     *       including null.
 558     *
 559     *     - Surface Format must be R8G8B8A8_UNORM.
 560     */
 561    unsigned surface_type = BRW_SURFACE_NULL;
 562    drm_intel_bo *bo = NULL;
 563    unsigned pitch_minus_1 = 0;
 564    uint32_t multisampling_state = 0;
 565    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 566                                     out_offset);
 567
 568    if (samples > 1) {
 569       /* On Gen6, null render targets seem to cause GPU hangs when
 570        * multisampling.  So work around this problem by rendering into dummy
 571        * color buffer.
 572        *
 573        * To decrease the amount of memory needed by the workaround buffer, we
 574        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 575        * the amount of memory needed for the workaround buffer is
 576        * (width_in_tiles + height_in_tiles - 1) tiles.
 577        *
 578        * Note that since the workaround buffer will be interpreted by the
 579        * hardware as an interleaved multisampled buffer, we need to compute
 580        * width_in_tiles and height_in_tiles by dividing the width and height
 581        * by 16 rather than the normal Y-tile size of 32.
 582        */
 583       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 584       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 585       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 586       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 587                          size_needed);
 588       bo = brw->wm.multisampled_null_render_target_bo;
 589       surface_type = BRW_SURFACE_2D;
 590       pitch_minus_1 = 127;
 591       multisampling_state = brw_get_surface_num_multisamples(samples);
 592    }
 593
 594    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 595               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 596    if (brw->gen < 6) {
 597       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 598                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 599                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 600                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 601    }
 602    surf[1] = bo ? bo->offset64 : 0;
 603    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 604               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 605
 606    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 607     * Notes):
 608     *
 609     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 610     */
 611    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 612               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 613    surf[4] = multisampling_state;
 614    surf[5] = 0;
 615
 616    if (bo) {
 617       drm_intel_bo_emit_reloc(brw->batch.bo,
 618                               *out_offset + 4,
 619                               bo, 0,
 620                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 621    }
 622 }
 623
 624 /**
 625  * Sets up a surface state structure to point at the given region.
 626  * While it is only used for the front/back buffer currently, it should be
 627  * usable for further buffers when doing ARB_draw_buffer support.
 628  */
 629 static uint32_t
 630 brw_update_renderbuffer_surface(struct brw_context *brw,
 631                                 struct gl_renderbuffer *rb,
 632                                 bool layered, unsigned unit,
 633                                 uint32_t surf_index)
 634 {
 635    struct gl_context *ctx = &brw->ctx;
 636    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 637    struct intel_mipmap_tree *mt = irb->mt;
 638    uint32_t *surf;
 639    uint32_t tile_x, tile_y;
 640    uint32_t format = 0;
 641    uint32_t offset;
 642    /* _NEW_BUFFERS */
 643    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 644    /* BRW_NEW_FS_PROG_DATA */
 645
 646    assert(!layered);
 647
 648    if (rb->TexImage && !brw->has_surface_tile_offset) {
 649       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 650
 651       if (tile_x != 0 || tile_y != 0) {
 652          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 653           * destination in a miptree unless you actually setup your renderbuffer
 654           * as a miptree and used the fragile lod/array_index/etc. controls to
 655           * select the image.  So, instead, we just make a new single-level
 656           * miptree and render into that.
 657           */
 658          intel_renderbuffer_move_to_temp(brw, irb, false);
 659          mt = irb->mt;
 660       }
 661    }
 662
 663    intel_miptree_used_for_rendering(irb->mt);
 664
 665    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 666
 667    format = brw->render_target_format[rb_format];
 668    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 669       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 670                     __func__, _mesa_get_format_name(rb_format));
 671    }
 672
 673    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 674               format << BRW_SURFACE_FORMAT_SHIFT);
 675
 676    /* reloc */
 677    assert(mt->offset % mt->cpp == 0);
 678    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 679               mt->bo->offset64 + mt->offset);
 680
 681    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 682               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 683
 684    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 685               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 686
 687    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 688
 689    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 690    /* Note that the low bits of these fields are missing, so
 691     * there's the possibility of getting in trouble.
 692     */
 693    assert(tile_x % 4 == 0);
 694    assert(tile_y % 2 == 0);
 695    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 696               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 697               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 698
 699    if (brw->gen < 6) {
 700       /* _NEW_COLOR */
 701       if (!ctx->Color.ColorLogicOpEnabled &&
 702           (ctx->Color.BlendEnabled & (1 << unit)))
 703          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 704
 705       if (!ctx->Color.ColorMask[unit][0])
 706          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 707       if (!ctx->Color.ColorMask[unit][1])
 708          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 709       if (!ctx->Color.ColorMask[unit][2])
 710          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 711
 712       /* As mentioned above, disable writes to the alpha component when the
 713        * renderbuffer is XRGB.
 714        */
 715       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 716           !ctx->Color.ColorMask[unit][3]) {
 717          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 718       }
 719    }
 720
 721    drm_intel_bo_emit_reloc(brw->batch.bo,
 722                            offset + 4,
 723                            mt->bo,
 724                            surf[1] - mt->bo->offset64,
 725                            I915_GEM_DOMAIN_RENDER,
 726                            I915_GEM_DOMAIN_RENDER);
 727
 728    return offset;
 729 }
 730
 731 /**
 732  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 733  */
 734 void
 735 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 736                                  const struct gl_framebuffer *fb,
 737                                  uint32_t render_target_start,
 738                                  uint32_t *surf_offset)
 739 {
 740    GLuint i;
 741
 742    /* Update surfaces for drawing buffers */
 743    if (fb->_NumColorDrawBuffers >= 1) {
 744       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 745          const uint32_t surf_index = render_target_start + i;
 746
 747          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 748             surf_offset[surf_index] =
 749                brw->vtbl.update_renderbuffer_surface(
 750                   brw, fb->_ColorDrawBuffers[i],
 751                   fb->MaxNumLayers > 0, i, surf_index);
 752          } else {
 753             brw->vtbl.emit_null_surface_state(
 754                brw, fb->Width, fb->Height, fb->Visual.samples,
 755                &surf_offset[surf_index]);
 756          }
 757       }
 758    } else {
 759       const uint32_t surf_index = render_target_start;
 760       brw->vtbl.emit_null_surface_state(
 761          brw, fb->Width, fb->Height, fb->Visual.samples,
 762          &surf_offset[surf_index]);
 763    }
 764 }
 765
 766 static void
 767 update_renderbuffer_surfaces(struct brw_context *brw)
 768 {
 769    const struct gl_context *ctx = &brw->ctx;
 770
 771    /* _NEW_BUFFERS | _NEW_COLOR */
 772    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 773    brw_update_renderbuffer_surfaces(
 774       brw, fb,
 775       brw->wm.prog_data->binding_table.render_target_start,
 776       brw->wm.base.surf_offset);
 777    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 778 }
 779
 780 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 781    .dirty = {
 782       .mesa = _NEW_BUFFERS |
 783               _NEW_COLOR,
 784       .brw = BRW_NEW_BATCH |
 785              BRW_NEW_FS_PROG_DATA,
 786    },
 787    .emit = update_renderbuffer_surfaces,
 788 };
 789
 790 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 791    .dirty = {
 792       .mesa = _NEW_BUFFERS,
 793       .brw = BRW_NEW_BATCH,
 794    },
 795    .emit = update_renderbuffer_surfaces,
 796 };
 797
 798
 799 static void
 800 update_stage_texture_surfaces(struct brw_context *brw,
 801                               const struct gl_program *prog,
 802                               struct brw_stage_state *stage_state,
 803                               bool for_gather)
 804 {
 805    if (!prog)
 806       return;
 807
 808    struct gl_context *ctx = &brw->ctx;
 809
 810    uint32_t *surf_offset = stage_state->surf_offset;
 811
 812    /* BRW_NEW_*_PROG_DATA */
 813    if (for_gather)
 814       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 815    else
 816       surf_offset += stage_state->prog_data->binding_table.texture_start;
 817
 818    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 819    for (unsigned s = 0; s < num_samplers; s++) {
 820       surf_offset[s] = 0;
 821
 822       if (prog->SamplersUsed & (1 << s)) {
 823          const unsigned unit = prog->SamplerUnits[s];
 824
 825          /* _NEW_TEXTURE */
 826          if (ctx->Texture.Unit[unit]._Current) {
 827             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 828          }
 829       }
 830    }
 831 }
 832
 833
 834 /**
 835  * Construct SURFACE_STATE objects for enabled textures.
 836  */
 837 static void
 838 brw_update_texture_surfaces(struct brw_context *brw)
 839 {
 840    /* BRW_NEW_VERTEX_PROGRAM */
 841    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 842
 843    /* BRW_NEW_GEOMETRY_PROGRAM */
 844    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 845
 846    /* BRW_NEW_FRAGMENT_PROGRAM */
 847    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 848
 849    /* _NEW_TEXTURE */
 850    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 851    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 852    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 853
 854    /* emit alternate set of surface state for gather. this
 855     * allows the surface format to be overriden for only the
 856     * gather4 messages. */
 857    if (brw->gen < 8) {
 858       if (vs && vs->UsesGather)
 859          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 860       if (gs && gs->UsesGather)
 861          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 862       if (fs && fs->UsesGather)
 863          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 864    }
 865
 866    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 867 }
 868
 869 const struct brw_tracked_state brw_texture_surfaces = {
 870    .dirty = {
 871       .mesa = _NEW_TEXTURE,
 872       .brw = BRW_NEW_BATCH |
 873              BRW_NEW_FRAGMENT_PROGRAM |
 874              BRW_NEW_FS_PROG_DATA |
 875              BRW_NEW_GEOMETRY_PROGRAM |
 876              BRW_NEW_GS_PROG_DATA |
 877              BRW_NEW_TEXTURE_BUFFER |
 878              BRW_NEW_VERTEX_PROGRAM |
 879              BRW_NEW_VS_PROG_DATA,
 880    },
 881    .emit = brw_update_texture_surfaces,
 882 };
 883
 884 void
 885 brw_upload_ubo_surfaces(struct brw_context *brw,
 886                         struct gl_shader *shader,
 887                         struct brw_stage_state *stage_state,
 888                         struct brw_stage_prog_data *prog_data,
 889                         bool dword_pitch)
 890 {
 891    struct gl_context *ctx = &brw->ctx;
 892
 893    if (!shader)
 894       return;
 895
 896    uint32_t *surf_offsets =
 897       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 898
 899    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 900       struct gl_uniform_buffer_binding *binding;
 901       struct intel_buffer_object *intel_bo;
 902
 903       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 904       intel_bo = intel_buffer_object(binding->BufferObject);
 905       drm_intel_bo *bo =
 906          intel_bufferobj_buffer(brw, intel_bo,
 907                                 binding->Offset,
 908                                 binding->BufferObject->Size - binding->Offset);
 909
 910       /* Because behavior for referencing outside of the binding's size in the
 911        * glBindBufferRange case is undefined, we can just bind the whole buffer
 912        * glBindBufferBase wants and be a correct implementation.
 913        */
 914       brw_create_constant_surface(brw, bo, binding->Offset,
 915                                   bo->size - binding->Offset,
 916                                   &surf_offsets[i],
 917                                   dword_pitch);
 918    }
 919
 920    if (shader->NumUniformBlocks)
 921       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 922 }
 923
 924 static void
 925 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 926 {
 927    struct gl_context *ctx = &brw->ctx;
 928    /* _NEW_PROGRAM */
 929    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 930
 931    if (!prog)
 932       return;
 933
 934    /* BRW_NEW_FS_PROG_DATA */
 935    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 936                            &brw->wm.base, &brw->wm.prog_data->base, true);
 937 }
 938
 939 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 940    .dirty = {
 941       .mesa = _NEW_PROGRAM,
 942       .brw = BRW_NEW_BATCH |
 943              BRW_NEW_FS_PROG_DATA |
 944              BRW_NEW_UNIFORM_BUFFER,
 945    },
 946    .emit = brw_upload_wm_ubo_surfaces,
 947 };
 948
 949 void
 950 brw_upload_abo_surfaces(struct brw_context *brw,
 951                         struct gl_shader_program *prog,
 952                         struct brw_stage_state *stage_state,
 953                         struct brw_stage_prog_data *prog_data)
 954 {
 955    struct gl_context *ctx = &brw->ctx;
 956    uint32_t *surf_offsets =
 957       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 958
 959    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 960       struct gl_atomic_buffer_binding *binding =
 961          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 962       struct intel_buffer_object *intel_bo =
 963          intel_buffer_object(binding->BufferObject);
 964       drm_intel_bo *bo = intel_bufferobj_buffer(
 965          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 966
 967       brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
 968                                           binding->Offset, BRW_SURFACEFORMAT_RAW,
 969                                           bo->size - binding->Offset, 1, true);
 970    }
 971
 972    if (prog->NumAtomicBuffers)
 973       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 974 }
 975
 976 static void
 977 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 978 {
 979    struct gl_context *ctx = &brw->ctx;
 980    /* _NEW_PROGRAM */
 981    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 982
 983    if (prog) {
 984       /* BRW_NEW_FS_PROG_DATA */
 985       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 986                               &brw->wm.prog_data->base);
 987    }
 988 }
 989
 990 const struct brw_tracked_state brw_wm_abo_surfaces = {
 991    .dirty = {
 992       .mesa = _NEW_PROGRAM,
 993       .brw = BRW_NEW_ATOMIC_BUFFER |
 994              BRW_NEW_BATCH |
 995              BRW_NEW_FS_PROG_DATA,
 996    },
 997    .emit = brw_upload_wm_abo_surfaces,
 998 };
 999
1000 static void
1001 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1002 {
1003    struct gl_context *ctx = &brw->ctx;
1004    /* _NEW_PROGRAM */
1005    struct gl_shader_program *prog =
1006       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1007
1008    if (prog) {
1009       /* BRW_NEW_CS_PROG_DATA */
1010       brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1011                               &brw->cs.prog_data->base);
1012    }
1013 }
1014
1015 const struct brw_tracked_state brw_cs_abo_surfaces = {
1016    .dirty = {
1017       .mesa = _NEW_PROGRAM,
1018       .brw = BRW_NEW_ATOMIC_BUFFER |
1019              BRW_NEW_BATCH |
1020              BRW_NEW_CS_PROG_DATA,
1021    },
1022    .emit = brw_upload_cs_abo_surfaces,
1023 };
1024
1025 void
1026 gen4_init_vtable_surface_functions(struct brw_context *brw)
1027 {
1028    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1029    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1030    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1031    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1032 }