src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38 #include "main/framebuffer.h"
  39
  40 #include "intel_mipmap_tree.h"
  41 #include "intel_batchbuffer.h"
  42 #include "intel_tex.h"
  43 #include "intel_fbo.h"
  44 #include "intel_buffer_objects.h"
  45
  46 #include "brw_context.h"
  47 #include "brw_state.h"
  48 #include "brw_defines.h"
  49 #include "brw_wm.h"
  50
  51 GLuint
  52 translate_tex_target(GLenum target)
  53 {
  54    switch (target) {
  55    case GL_TEXTURE_1D:
  56    case GL_TEXTURE_1D_ARRAY_EXT:
  57       return BRW_SURFACE_1D;
  58
  59    case GL_TEXTURE_RECTANGLE_NV:
  60       return BRW_SURFACE_2D;
  61
  62    case GL_TEXTURE_2D:
  63    case GL_TEXTURE_2D_ARRAY_EXT:
  64    case GL_TEXTURE_EXTERNAL_OES:
  65    case GL_TEXTURE_2D_MULTISAMPLE:
  66    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  67       return BRW_SURFACE_2D;
  68
  69    case GL_TEXTURE_3D:
  70       return BRW_SURFACE_3D;
  71
  72    case GL_TEXTURE_CUBE_MAP:
  73    case GL_TEXTURE_CUBE_MAP_ARRAY:
  74       return BRW_SURFACE_CUBE;
  75
  76    default:
  77       unreachable("not reached");
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104 void
 105 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 106                       bool is_render_target,
 107                       unsigned *width, unsigned *height,
 108                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 109 {
 110    static const unsigned halign_stencil = 8;
 111
 112    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 113     * there are half as many rows.
 114     * In addition, mip-levels are accessed manually by the program and
 115     * therefore the surface is setup to cover all the mip-levels for one slice.
 116     * (Hardware is still used to access individual slices).
 117     */
 118    *tiling = I915_TILING_Y;
 119    *pitch = mt->pitch * 2;
 120    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 121    *height = (mt->total_height / mt->physical_depth0) / 2;
 122
 123    if (is_render_target) {
 124       *format = BRW_SURFACEFORMAT_R8_UINT;
 125    }
 126 }
 127
 128
 129 /**
 130  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 131  * swizzling.
 132  */
 133 int
 134 brw_get_texture_swizzle(const struct gl_context *ctx,
 135                         const struct gl_texture_object *t)
 136 {
 137    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 138
 139    int swizzles[SWIZZLE_NIL + 1] = {
 140       SWIZZLE_X,
 141       SWIZZLE_Y,
 142       SWIZZLE_Z,
 143       SWIZZLE_W,
 144       SWIZZLE_ZERO,
 145       SWIZZLE_ONE,
 146       SWIZZLE_NIL
 147    };
 148
 149    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 150        img->_BaseFormat == GL_DEPTH_STENCIL) {
 151       GLenum depth_mode = t->DepthMode;
 152
 153       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 154        * with depth component data specified with a sized internal format.
 155        * Otherwise, it's left at the old default, GL_LUMINANCE.
 156        */
 157       if (_mesa_is_gles3(ctx) &&
 158           img->InternalFormat != GL_DEPTH_COMPONENT &&
 159           img->InternalFormat != GL_DEPTH_STENCIL) {
 160          depth_mode = GL_RED;
 161       }
 162
 163       switch (depth_mode) {
 164       case GL_ALPHA:
 165          swizzles[0] = SWIZZLE_ZERO;
 166          swizzles[1] = SWIZZLE_ZERO;
 167          swizzles[2] = SWIZZLE_ZERO;
 168          swizzles[3] = SWIZZLE_X;
 169          break;
 170       case GL_LUMINANCE:
 171          swizzles[0] = SWIZZLE_X;
 172          swizzles[1] = SWIZZLE_X;
 173          swizzles[2] = SWIZZLE_X;
 174          swizzles[3] = SWIZZLE_ONE;
 175          break;
 176       case GL_INTENSITY:
 177          swizzles[0] = SWIZZLE_X;
 178          swizzles[1] = SWIZZLE_X;
 179          swizzles[2] = SWIZZLE_X;
 180          swizzles[3] = SWIZZLE_X;
 181          break;
 182       case GL_RED:
 183          swizzles[0] = SWIZZLE_X;
 184          swizzles[1] = SWIZZLE_ZERO;
 185          swizzles[2] = SWIZZLE_ZERO;
 186          swizzles[3] = SWIZZLE_ONE;
 187          break;
 188       }
 189    }
 190
 191    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 192
 193    /* If the texture's format is alpha-only, force R, G, and B to
 194     * 0.0. Similarly, if the texture's format has no alpha channel,
 195     * force the alpha value read to 1.0. This allows for the
 196     * implementation to use an RGBA texture for any of these formats
 197     * without leaking any unexpected values.
 198     */
 199    switch (img->_BaseFormat) {
 200    case GL_ALPHA:
 201       swizzles[0] = SWIZZLE_ZERO;
 202       swizzles[1] = SWIZZLE_ZERO;
 203       swizzles[2] = SWIZZLE_ZERO;
 204       break;
 205    case GL_LUMINANCE:
 206       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 207          swizzles[0] = SWIZZLE_X;
 208          swizzles[1] = SWIZZLE_X;
 209          swizzles[2] = SWIZZLE_X;
 210          swizzles[3] = SWIZZLE_ONE;
 211       }
 212       break;
 213    case GL_LUMINANCE_ALPHA:
 214       if (datatype == GL_SIGNED_NORMALIZED) {
 215          swizzles[0] = SWIZZLE_X;
 216          swizzles[1] = SWIZZLE_X;
 217          swizzles[2] = SWIZZLE_X;
 218          swizzles[3] = SWIZZLE_W;
 219       }
 220       break;
 221    case GL_INTENSITY:
 222       if (datatype == GL_SIGNED_NORMALIZED) {
 223          swizzles[0] = SWIZZLE_X;
 224          swizzles[1] = SWIZZLE_X;
 225          swizzles[2] = SWIZZLE_X;
 226          swizzles[3] = SWIZZLE_X;
 227       }
 228       break;
 229    case GL_RED:
 230    case GL_RG:
 231    case GL_RGB:
 232       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 233          swizzles[3] = SWIZZLE_ONE;
 234       break;
 235    }
 236
 237    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 238                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 239                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 241 }
 242
 243 static void
 244 gen4_emit_buffer_surface_state(struct brw_context *brw,
 245                                uint32_t *out_offset,
 246                                drm_intel_bo *bo,
 247                                unsigned buffer_offset,
 248                                unsigned surface_format,
 249                                unsigned buffer_size,
 250                                unsigned pitch,
 251                                bool rw)
 252 {
 253    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 254                                     6 * 4, 32, out_offset);
 255    memset(surf, 0, 6 * 4);
 256
 257    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 258              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 259              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 260    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 261    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 262              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 263    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 264              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 265
 266    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 267     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 268     * physical cache.  It is mapped in hardware to the sampler cache."
 269     */
 270    if (bo) {
 271       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 272                               bo, buffer_offset,
 273                               I915_GEM_DOMAIN_SAMPLER,
 274                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 275    }
 276 }
 277
 278 void
 279 brw_update_buffer_texture_surface(struct gl_context *ctx,
 280                                   unsigned unit,
 281                                   uint32_t *surf_offset)
 282 {
 283    struct brw_context *brw = brw_context(ctx);
 284    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 285    struct intel_buffer_object *intel_obj =
 286       intel_buffer_object(tObj->BufferObject);
 287    uint32_t size = tObj->BufferSize;
 288    drm_intel_bo *bo = NULL;
 289    mesa_format format = tObj->_BufferObjectFormat;
 290    uint32_t brw_format = brw_format_for_mesa_format(format);
 291    int texel_size = _mesa_get_format_bytes(format);
 292
 293    if (intel_obj) {
 294       size = MIN2(size, intel_obj->Base.Size);
 295       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 296    }
 297
 298    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 299       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 300                     _mesa_get_format_name(format));
 301    }
 302
 303    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 304                                        tObj->BufferOffset,
 305                                        brw_format,
 306                                        size / texel_size,
 307                                        texel_size,
 308                                        false /* rw */);
 309 }
 310
 311 static void
 312 brw_update_texture_surface(struct gl_context *ctx,
 313                            unsigned unit,
 314                            uint32_t *surf_offset,
 315                            bool for_gather)
 316 {
 317    struct brw_context *brw = brw_context(ctx);
 318    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 319    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 320    struct intel_mipmap_tree *mt = intelObj->mt;
 321    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 322    uint32_t *surf;
 323
 324    /* BRW_NEW_TEXTURE_BUFFER */
 325    if (tObj->Target == GL_TEXTURE_BUFFER) {
 326       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 327       return;
 328    }
 329
 330    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 331                           6 * 4, 32, surf_offset);
 332
 333    uint32_t tex_format = translate_tex_format(brw, mt->format,
 334                                               sampler->sRGBDecode);
 335
 336    if (for_gather) {
 337       /* Sandybridge's gather4 message is broken for integer formats.
 338        * To work around this, we pretend the surface is UNORM for
 339        * 8 or 16-bit formats, and emit shader instructions to recover
 340        * the real INT/UINT value.  For 32-bit formats, we pretend
 341        * the surface is FLOAT, and simply reinterpret the resulting
 342        * bits.
 343        */
 344       switch (tex_format) {
 345       case BRW_SURFACEFORMAT_R8_SINT:
 346       case BRW_SURFACEFORMAT_R8_UINT:
 347          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 348          break;
 349
 350       case BRW_SURFACEFORMAT_R16_SINT:
 351       case BRW_SURFACEFORMAT_R16_UINT:
 352          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 353          break;
 354
 355       case BRW_SURFACEFORMAT_R32_SINT:
 356       case BRW_SURFACEFORMAT_R32_UINT:
 357          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 358          break;
 359
 360       default:
 361          break;
 362       }
 363    }
 364
 365    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 366               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 367               BRW_SURFACE_CUBEFACE_ENABLES |
 368               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 369
 370    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 371
 372    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 373               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 374               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 375
 376    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 377               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 378               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 379
 380    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 381               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 382
 383    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 384
 385    /* Emit relocation to surface contents */
 386    drm_intel_bo_emit_reloc(brw->batch.bo,
 387                            *surf_offset + 4,
 388                            mt->bo,
 389                            surf[1] - mt->bo->offset64,
 390                            I915_GEM_DOMAIN_SAMPLER, 0);
 391 }
 392
 393 /**
 394  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 395  * read from this buffer with Data Port Read instructions/messages.
 396  */
 397 void
 398 brw_create_constant_surface(struct brw_context *brw,
 399                             drm_intel_bo *bo,
 400                             uint32_t offset,
 401                             uint32_t size,
 402                             uint32_t *out_offset,
 403                             bool dword_pitch)
 404 {
 405    uint32_t stride = dword_pitch ? 4 : 16;
 406    uint32_t elements = ALIGN(size, stride) / stride;
 407
 408    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 409                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 410                                        elements, stride, false);
 411 }
 412
 413 /**
 414  * Set up a binding table entry for use by stream output logic (transform
 415  * feedback).
 416  *
 417  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 418  */
 419 void
 420 brw_update_sol_surface(struct brw_context *brw,
 421                        struct gl_buffer_object *buffer_obj,
 422                        uint32_t *out_offset, unsigned num_vector_components,
 423                        unsigned stride_dwords, unsigned offset_dwords)
 424 {
 425    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 426    uint32_t offset_bytes = 4 * offset_dwords;
 427    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 428                                              offset_bytes,
 429                                              buffer_obj->Size - offset_bytes);
 430    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 431                                     out_offset);
 432    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 433    size_t size_dwords = buffer_obj->Size / 4;
 434    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 435
 436    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 437     * too big to map using a single binding table entry?
 438     */
 439    assert((size_dwords - offset_dwords) / stride_dwords
 440           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 441
 442    if (size_dwords > offset_dwords + num_vector_components) {
 443       /* There is room for at least 1 transform feedback output in the buffer.
 444        * Compute the number of additional transform feedback outputs the
 445        * buffer has room for.
 446        */
 447       buffer_size_minus_1 =
 448          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 449    } else {
 450       /* There isn't even room for a single transform feedback output in the
 451        * buffer.  We can't configure the binding table entry to prevent output
 452        * entirely; we'll have to rely on the geometry shader to detect
 453        * overflow.  But to minimize the damage in case of a bug, set up the
 454        * binding table entry to just allow a single output.
 455        */
 456       buffer_size_minus_1 = 0;
 457    }
 458    width = buffer_size_minus_1 & 0x7f;
 459    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 460    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 461
 462    switch (num_vector_components) {
 463    case 1:
 464       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 465       break;
 466    case 2:
 467       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 468       break;
 469    case 3:
 470       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 471       break;
 472    case 4:
 473       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 474       break;
 475    default:
 476       unreachable("Invalid vector size for transform feedback output");
 477    }
 478
 479    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 480       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 481       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 482       BRW_SURFACE_RC_READ_WRITE;
 483    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 484    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 485               height << BRW_SURFACE_HEIGHT_SHIFT);
 486    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 487               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 488    surf[4] = 0;
 489    surf[5] = 0;
 490
 491    /* Emit relocation to surface contents. */
 492    drm_intel_bo_emit_reloc(brw->batch.bo,
 493                            *out_offset + 4,
 494                            bo, offset_bytes,
 495                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 496 }
 497
 498 /* Creates a new WM constant buffer reflecting the current fragment program's
 499  * constants, if needed by the fragment program.
 500  *
 501  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 502  * state atom.
 503  */
 504 static void
 505 brw_upload_wm_pull_constants(struct brw_context *brw)
 506 {
 507    struct brw_stage_state *stage_state = &brw->wm.base;
 508    /* BRW_NEW_FRAGMENT_PROGRAM */
 509    struct brw_fragment_program *fp =
 510       (struct brw_fragment_program *) brw->fragment_program;
 511    /* BRW_NEW_FS_PROG_DATA */
 512    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 513
 514    /* _NEW_PROGRAM_CONSTANTS */
 515    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 516                              stage_state, prog_data, true);
 517 }
 518
 519 const struct brw_tracked_state brw_wm_pull_constants = {
 520    .dirty = {
 521       .mesa = _NEW_PROGRAM_CONSTANTS,
 522       .brw = BRW_NEW_BATCH |
 523              BRW_NEW_FRAGMENT_PROGRAM |
 524              BRW_NEW_FS_PROG_DATA,
 525    },
 526    .emit = brw_upload_wm_pull_constants,
 527 };
 528
 529 /**
 530  * Creates a null renderbuffer surface.
 531  *
 532  * This is used when the shader doesn't write to any color output.  An FB
 533  * write to target 0 will still be emitted, because that's how the thread is
 534  * terminated (and computed depth is returned), so we need to have the
 535  * hardware discard the target 0 color output..
 536  */
 537 static void
 538 brw_emit_null_surface_state(struct brw_context *brw,
 539                             unsigned width,
 540                             unsigned height,
 541                             unsigned samples,
 542                             uint32_t *out_offset)
 543 {
 544    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 545     * Notes):
 546     *
 547     *     A null surface will be used in instances where an actual surface is
 548     *     not bound. When a write message is generated to a null surface, no
 549     *     actual surface is written to. When a read message (including any
 550     *     sampling engine message) is generated to a null surface, the result
 551     *     is all zeros. Note that a null surface type is allowed to be used
 552     *     with all messages, even if it is not specificially indicated as
 553     *     supported. All of the remaining fields in surface state are ignored
 554     *     for null surfaces, with the following exceptions:
 555     *
 556     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 557     *       depth buffer’s corresponding state for all render target surfaces,
 558     *       including null.
 559     *
 560     *     - Surface Format must be R8G8B8A8_UNORM.
 561     */
 562    unsigned surface_type = BRW_SURFACE_NULL;
 563    drm_intel_bo *bo = NULL;
 564    unsigned pitch_minus_1 = 0;
 565    uint32_t multisampling_state = 0;
 566    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 567                                     out_offset);
 568
 569    if (samples > 1) {
 570       /* On Gen6, null render targets seem to cause GPU hangs when
 571        * multisampling.  So work around this problem by rendering into dummy
 572        * color buffer.
 573        *
 574        * To decrease the amount of memory needed by the workaround buffer, we
 575        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 576        * the amount of memory needed for the workaround buffer is
 577        * (width_in_tiles + height_in_tiles - 1) tiles.
 578        *
 579        * Note that since the workaround buffer will be interpreted by the
 580        * hardware as an interleaved multisampled buffer, we need to compute
 581        * width_in_tiles and height_in_tiles by dividing the width and height
 582        * by 16 rather than the normal Y-tile size of 32.
 583        */
 584       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 585       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 586       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 587       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 588                          size_needed);
 589       bo = brw->wm.multisampled_null_render_target_bo;
 590       surface_type = BRW_SURFACE_2D;
 591       pitch_minus_1 = 127;
 592       multisampling_state = brw_get_surface_num_multisamples(samples);
 593    }
 594
 595    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 596               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 597    if (brw->gen < 6) {
 598       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 599                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 600                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 601                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 602    }
 603    surf[1] = bo ? bo->offset64 : 0;
 604    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 605               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 606
 607    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 608     * Notes):
 609     *
 610     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 611     */
 612    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 613               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 614    surf[4] = multisampling_state;
 615    surf[5] = 0;
 616
 617    if (bo) {
 618       drm_intel_bo_emit_reloc(brw->batch.bo,
 619                               *out_offset + 4,
 620                               bo, 0,
 621                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 622    }
 623 }
 624
 625 /**
 626  * Sets up a surface state structure to point at the given region.
 627  * While it is only used for the front/back buffer currently, it should be
 628  * usable for further buffers when doing ARB_draw_buffer support.
 629  */
 630 static uint32_t
 631 brw_update_renderbuffer_surface(struct brw_context *brw,
 632                                 struct gl_renderbuffer *rb,
 633                                 bool layered, unsigned unit,
 634                                 uint32_t surf_index)
 635 {
 636    struct gl_context *ctx = &brw->ctx;
 637    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 638    struct intel_mipmap_tree *mt = irb->mt;
 639    uint32_t *surf;
 640    uint32_t tile_x, tile_y;
 641    uint32_t format = 0;
 642    uint32_t offset;
 643    /* _NEW_BUFFERS */
 644    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 645    /* BRW_NEW_FS_PROG_DATA */
 646
 647    assert(!layered);
 648
 649    if (rb->TexImage && !brw->has_surface_tile_offset) {
 650       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 651
 652       if (tile_x != 0 || tile_y != 0) {
 653          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 654           * destination in a miptree unless you actually setup your renderbuffer
 655           * as a miptree and used the fragile lod/array_index/etc. controls to
 656           * select the image.  So, instead, we just make a new single-level
 657           * miptree and render into that.
 658           */
 659          intel_renderbuffer_move_to_temp(brw, irb, false);
 660          mt = irb->mt;
 661       }
 662    }
 663
 664    intel_miptree_used_for_rendering(irb->mt);
 665
 666    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 667
 668    format = brw->render_target_format[rb_format];
 669    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 670       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 671                     __func__, _mesa_get_format_name(rb_format));
 672    }
 673
 674    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 675               format << BRW_SURFACE_FORMAT_SHIFT);
 676
 677    /* reloc */
 678    assert(mt->offset % mt->cpp == 0);
 679    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 680               mt->bo->offset64 + mt->offset);
 681
 682    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 683               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 684
 685    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 686               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 687
 688    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 689
 690    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 691    /* Note that the low bits of these fields are missing, so
 692     * there's the possibility of getting in trouble.
 693     */
 694    assert(tile_x % 4 == 0);
 695    assert(tile_y % 2 == 0);
 696    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 697               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 698               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 699
 700    if (brw->gen < 6) {
 701       /* _NEW_COLOR */
 702       if (!ctx->Color.ColorLogicOpEnabled &&
 703           (ctx->Color.BlendEnabled & (1 << unit)))
 704          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 705
 706       if (!ctx->Color.ColorMask[unit][0])
 707          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 708       if (!ctx->Color.ColorMask[unit][1])
 709          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 710       if (!ctx->Color.ColorMask[unit][2])
 711          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 712
 713       /* As mentioned above, disable writes to the alpha component when the
 714        * renderbuffer is XRGB.
 715        */
 716       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 717           !ctx->Color.ColorMask[unit][3]) {
 718          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 719       }
 720    }
 721
 722    drm_intel_bo_emit_reloc(brw->batch.bo,
 723                            offset + 4,
 724                            mt->bo,
 725                            surf[1] - mt->bo->offset64,
 726                            I915_GEM_DOMAIN_RENDER,
 727                            I915_GEM_DOMAIN_RENDER);
 728
 729    return offset;
 730 }
 731
 732 /**
 733  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 734  */
 735 void
 736 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 737                                  const struct gl_framebuffer *fb,
 738                                  uint32_t render_target_start,
 739                                  uint32_t *surf_offset)
 740 {
 741    GLuint i;
 742    const unsigned int w = _mesa_geometric_width(fb);
 743    const unsigned int h = _mesa_geometric_height(fb);
 744    const unsigned int s = _mesa_geometric_samples(fb);
 745
 746    /* Update surfaces for drawing buffers */
 747    if (fb->_NumColorDrawBuffers >= 1) {
 748       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 749          const uint32_t surf_index = render_target_start + i;
 750
 751          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 752             surf_offset[surf_index] =
 753                brw->vtbl.update_renderbuffer_surface(
 754                   brw, fb->_ColorDrawBuffers[i],
 755                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 756          } else {
 757             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 758                &surf_offset[surf_index]);
 759          }
 760       }
 761    } else {
 762       const uint32_t surf_index = render_target_start;
 763       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 764          &surf_offset[surf_index]);
 765    }
 766 }
 767
 768 static void
 769 update_renderbuffer_surfaces(struct brw_context *brw)
 770 {
 771    const struct gl_context *ctx = &brw->ctx;
 772
 773    /* _NEW_BUFFERS | _NEW_COLOR */
 774    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 775    brw_update_renderbuffer_surfaces(
 776       brw, fb,
 777       brw->wm.prog_data->binding_table.render_target_start,
 778       brw->wm.base.surf_offset);
 779    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 780 }
 781
 782 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 783    .dirty = {
 784       .mesa = _NEW_BUFFERS |
 785               _NEW_COLOR,
 786       .brw = BRW_NEW_BATCH |
 787              BRW_NEW_FS_PROG_DATA,
 788    },
 789    .emit = update_renderbuffer_surfaces,
 790 };
 791
 792 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 793    .dirty = {
 794       .mesa = _NEW_BUFFERS,
 795       .brw = BRW_NEW_BATCH,
 796    },
 797    .emit = update_renderbuffer_surfaces,
 798 };
 799
 800
 801 static void
 802 update_stage_texture_surfaces(struct brw_context *brw,
 803                               const struct gl_program *prog,
 804                               struct brw_stage_state *stage_state,
 805                               bool for_gather)
 806 {
 807    if (!prog)
 808       return;
 809
 810    struct gl_context *ctx = &brw->ctx;
 811
 812    uint32_t *surf_offset = stage_state->surf_offset;
 813
 814    /* BRW_NEW_*_PROG_DATA */
 815    if (for_gather)
 816       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 817    else
 818       surf_offset += stage_state->prog_data->binding_table.texture_start;
 819
 820    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 821    for (unsigned s = 0; s < num_samplers; s++) {
 822       surf_offset[s] = 0;
 823
 824       if (prog->SamplersUsed & (1 << s)) {
 825          const unsigned unit = prog->SamplerUnits[s];
 826
 827          /* _NEW_TEXTURE */
 828          if (ctx->Texture.Unit[unit]._Current) {
 829             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 830          }
 831       }
 832    }
 833 }
 834
 835
 836 /**
 837  * Construct SURFACE_STATE objects for enabled textures.
 838  */
 839 static void
 840 brw_update_texture_surfaces(struct brw_context *brw)
 841 {
 842    /* BRW_NEW_VERTEX_PROGRAM */
 843    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 844
 845    /* BRW_NEW_GEOMETRY_PROGRAM */
 846    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 847
 848    /* BRW_NEW_FRAGMENT_PROGRAM */
 849    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 850
 851    /* _NEW_TEXTURE */
 852    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 853    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 854    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 855
 856    /* emit alternate set of surface state for gather. this
 857     * allows the surface format to be overriden for only the
 858     * gather4 messages. */
 859    if (brw->gen < 8) {
 860       if (vs && vs->UsesGather)
 861          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 862       if (gs && gs->UsesGather)
 863          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 864       if (fs && fs->UsesGather)
 865          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 866    }
 867
 868    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 869 }
 870
 871 const struct brw_tracked_state brw_texture_surfaces = {
 872    .dirty = {
 873       .mesa = _NEW_TEXTURE,
 874       .brw = BRW_NEW_BATCH |
 875              BRW_NEW_FRAGMENT_PROGRAM |
 876              BRW_NEW_FS_PROG_DATA |
 877              BRW_NEW_GEOMETRY_PROGRAM |
 878              BRW_NEW_GS_PROG_DATA |
 879              BRW_NEW_TEXTURE_BUFFER |
 880              BRW_NEW_VERTEX_PROGRAM |
 881              BRW_NEW_VS_PROG_DATA,
 882    },
 883    .emit = brw_update_texture_surfaces,
 884 };
 885
 886 void
 887 brw_upload_ubo_surfaces(struct brw_context *brw,
 888                         struct gl_shader *shader,
 889                         struct brw_stage_state *stage_state,
 890                         struct brw_stage_prog_data *prog_data,
 891                         bool dword_pitch)
 892 {
 893    struct gl_context *ctx = &brw->ctx;
 894
 895    if (!shader)
 896       return;
 897
 898    uint32_t *surf_offsets =
 899       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 900
 901    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 902       struct gl_uniform_buffer_binding *binding;
 903       struct intel_buffer_object *intel_bo;
 904
 905       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 906       intel_bo = intel_buffer_object(binding->BufferObject);
 907       drm_intel_bo *bo =
 908          intel_bufferobj_buffer(brw, intel_bo,
 909                                 binding->Offset,
 910                                 binding->BufferObject->Size - binding->Offset);
 911
 912       /* Because behavior for referencing outside of the binding's size in the
 913        * glBindBufferRange case is undefined, we can just bind the whole buffer
 914        * glBindBufferBase wants and be a correct implementation.
 915        */
 916       brw_create_constant_surface(brw, bo, binding->Offset,
 917                                   bo->size - binding->Offset,
 918                                   &surf_offsets[i],
 919                                   dword_pitch);
 920    }
 921
 922    if (shader->NumUniformBlocks)
 923       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 924 }
 925
 926 static void
 927 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 928 {
 929    struct gl_context *ctx = &brw->ctx;
 930    /* _NEW_PROGRAM */
 931    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 932
 933    if (!prog)
 934       return;
 935
 936    /* BRW_NEW_FS_PROG_DATA */
 937    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 938                            &brw->wm.base, &brw->wm.prog_data->base, true);
 939 }
 940
 941 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 942    .dirty = {
 943       .mesa = _NEW_PROGRAM,
 944       .brw = BRW_NEW_BATCH |
 945              BRW_NEW_FS_PROG_DATA |
 946              BRW_NEW_UNIFORM_BUFFER,
 947    },
 948    .emit = brw_upload_wm_ubo_surfaces,
 949 };
 950
 951 void
 952 brw_upload_abo_surfaces(struct brw_context *brw,
 953                         struct gl_shader_program *prog,
 954                         struct brw_stage_state *stage_state,
 955                         struct brw_stage_prog_data *prog_data)
 956 {
 957    struct gl_context *ctx = &brw->ctx;
 958    uint32_t *surf_offsets =
 959       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 960
 961    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 962       struct gl_atomic_buffer_binding *binding =
 963          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 964       struct intel_buffer_object *intel_bo =
 965          intel_buffer_object(binding->BufferObject);
 966       drm_intel_bo *bo = intel_bufferobj_buffer(
 967          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 968
 969       brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
 970                                           binding->Offset, BRW_SURFACEFORMAT_RAW,
 971                                           bo->size - binding->Offset, 1, true);
 972    }
 973
 974    if (prog->NumAtomicBuffers)
 975       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 976 }
 977
 978 static void
 979 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 980 {
 981    struct gl_context *ctx = &brw->ctx;
 982    /* _NEW_PROGRAM */
 983    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 984
 985    if (prog) {
 986       /* BRW_NEW_FS_PROG_DATA */
 987       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 988                               &brw->wm.prog_data->base);
 989    }
 990 }
 991
 992 const struct brw_tracked_state brw_wm_abo_surfaces = {
 993    .dirty = {
 994       .mesa = _NEW_PROGRAM,
 995       .brw = BRW_NEW_ATOMIC_BUFFER |
 996              BRW_NEW_BATCH |
 997              BRW_NEW_FS_PROG_DATA,
 998    },
 999    .emit = brw_upload_wm_abo_surfaces,
1000 };
1001
1002 static void
1003 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1004 {
1005    struct gl_context *ctx = &brw->ctx;
1006    /* _NEW_PROGRAM */
1007    struct gl_shader_program *prog =
1008       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1009
1010    if (prog) {
1011       /* BRW_NEW_CS_PROG_DATA */
1012       brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1013                               &brw->cs.prog_data->base);
1014    }
1015 }
1016
1017 const struct brw_tracked_state brw_cs_abo_surfaces = {
1018    .dirty = {
1019       .mesa = _NEW_PROGRAM,
1020       .brw = BRW_NEW_ATOMIC_BUFFER |
1021              BRW_NEW_BATCH |
1022              BRW_NEW_CS_PROG_DATA,
1023    },
1024    .emit = brw_upload_cs_abo_surfaces,
1025 };
1026
1027 void
1028 gen4_init_vtable_surface_functions(struct brw_context *brw)
1029 {
1030    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1031    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1032    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1033    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1034 }