src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    /* If the texture's format is alpha-only, force R, G, and B to
 191     * 0.0. Similarly, if the texture's format has no alpha channel,
 192     * force the alpha value read to 1.0. This allows for the
 193     * implementation to use an RGBA texture for any of these formats
 194     * without leaking any unexpected values.
 195     */
 196    switch (img->_BaseFormat) {
 197    case GL_ALPHA:
 198       swizzles[0] = SWIZZLE_ZERO;
 199       swizzles[1] = SWIZZLE_ZERO;
 200       swizzles[2] = SWIZZLE_ZERO;
 201       break;
 202    case GL_LUMINANCE:
 203       if (t->_IsIntegerFormat) {
 204          swizzles[0] = SWIZZLE_X;
 205          swizzles[1] = SWIZZLE_X;
 206          swizzles[2] = SWIZZLE_X;
 207          swizzles[3] = SWIZZLE_ONE;
 208       }
 209       break;
 210    case GL_RED:
 211    case GL_RG:
 212    case GL_RGB:
 213       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 214          swizzles[3] = SWIZZLE_ONE;
 215       break;
 216    }
 217
 218    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 219                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 220                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 221                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 222 }
 223
 224 static void
 225 gen4_emit_buffer_surface_state(struct brw_context *brw,
 226                                uint32_t *out_offset,
 227                                drm_intel_bo *bo,
 228                                unsigned buffer_offset,
 229                                unsigned surface_format,
 230                                unsigned buffer_size,
 231                                unsigned pitch,
 232                                bool rw)
 233 {
 234    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 235                                     6 * 4, 32, out_offset);
 236    memset(surf, 0, 6 * 4);
 237
 238    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 239              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 240              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 241    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 242    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 243              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 244    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 245              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 246
 247    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 248     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 249     * physical cache.  It is mapped in hardware to the sampler cache."
 250     */
 251    if (bo) {
 252       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 253                               bo, buffer_offset,
 254                               I915_GEM_DOMAIN_SAMPLER,
 255                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 256    }
 257 }
 258
 259 void
 260 brw_update_buffer_texture_surface(struct gl_context *ctx,
 261                                   unsigned unit,
 262                                   uint32_t *surf_offset)
 263 {
 264    struct brw_context *brw = brw_context(ctx);
 265    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 266    struct intel_buffer_object *intel_obj =
 267       intel_buffer_object(tObj->BufferObject);
 268    uint32_t size = tObj->BufferSize;
 269    drm_intel_bo *bo = NULL;
 270    mesa_format format = tObj->_BufferObjectFormat;
 271    uint32_t brw_format = brw_format_for_mesa_format(format);
 272    int texel_size = _mesa_get_format_bytes(format);
 273
 274    if (intel_obj) {
 275       size = MIN2(size, intel_obj->Base.Size);
 276       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 277    }
 278
 279    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 280       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 281                     _mesa_get_format_name(format));
 282    }
 283
 284    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 285                                        tObj->BufferOffset,
 286                                        brw_format,
 287                                        size / texel_size,
 288                                        texel_size,
 289                                        false /* rw */);
 290 }
 291
 292 static void
 293 brw_update_texture_surface(struct gl_context *ctx,
 294                            unsigned unit,
 295                            uint32_t *surf_offset,
 296                            bool for_gather)
 297 {
 298    struct brw_context *brw = brw_context(ctx);
 299    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 300    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 301    struct intel_mipmap_tree *mt = intelObj->mt;
 302    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 303    uint32_t *surf;
 304
 305    /* BRW_NEW_TEXTURE_BUFFER */
 306    if (tObj->Target == GL_TEXTURE_BUFFER) {
 307       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 308       return;
 309    }
 310
 311    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 312                           6 * 4, 32, surf_offset);
 313
 314    uint32_t tex_format = translate_tex_format(brw, mt->format,
 315                                               sampler->sRGBDecode);
 316
 317    if (for_gather) {
 318       /* Sandybridge's gather4 message is broken for integer formats.
 319        * To work around this, we pretend the surface is UNORM for
 320        * 8 or 16-bit formats, and emit shader instructions to recover
 321        * the real INT/UINT value.  For 32-bit formats, we pretend
 322        * the surface is FLOAT, and simply reinterpret the resulting
 323        * bits.
 324        */
 325       switch (tex_format) {
 326       case BRW_SURFACEFORMAT_R8_SINT:
 327       case BRW_SURFACEFORMAT_R8_UINT:
 328          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 329          break;
 330
 331       case BRW_SURFACEFORMAT_R16_SINT:
 332       case BRW_SURFACEFORMAT_R16_UINT:
 333          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 334          break;
 335
 336       case BRW_SURFACEFORMAT_R32_SINT:
 337       case BRW_SURFACEFORMAT_R32_UINT:
 338          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 339          break;
 340
 341       default:
 342          break;
 343       }
 344    }
 345
 346    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 347               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 348               BRW_SURFACE_CUBEFACE_ENABLES |
 349               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 350
 351    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 352
 353    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 354               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 355               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 356
 357    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 358               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 359               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 360
 361    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 362               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 363
 364    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 365
 366    /* Emit relocation to surface contents */
 367    drm_intel_bo_emit_reloc(brw->batch.bo,
 368                            *surf_offset + 4,
 369                            mt->bo,
 370                            surf[1] - mt->bo->offset64,
 371                            I915_GEM_DOMAIN_SAMPLER, 0);
 372 }
 373
 374 /**
 375  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 376  * read from this buffer with Data Port Read instructions/messages.
 377  */
 378 void
 379 brw_create_constant_surface(struct brw_context *brw,
 380                             drm_intel_bo *bo,
 381                             uint32_t offset,
 382                             uint32_t size,
 383                             uint32_t *out_offset,
 384                             bool dword_pitch)
 385 {
 386    uint32_t stride = dword_pitch ? 4 : 16;
 387    uint32_t elements = ALIGN(size, stride) / stride;
 388
 389    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 390                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 391                                        elements, stride, false);
 392 }
 393
 394 /**
 395  * Set up a binding table entry for use by stream output logic (transform
 396  * feedback).
 397  *
 398  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 399  */
 400 void
 401 brw_update_sol_surface(struct brw_context *brw,
 402                        struct gl_buffer_object *buffer_obj,
 403                        uint32_t *out_offset, unsigned num_vector_components,
 404                        unsigned stride_dwords, unsigned offset_dwords)
 405 {
 406    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 407    uint32_t offset_bytes = 4 * offset_dwords;
 408    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 409                                              offset_bytes,
 410                                              buffer_obj->Size - offset_bytes);
 411    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 412                                     out_offset);
 413    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 414    size_t size_dwords = buffer_obj->Size / 4;
 415    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 416
 417    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 418     * too big to map using a single binding table entry?
 419     */
 420    assert((size_dwords - offset_dwords) / stride_dwords
 421           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 422
 423    if (size_dwords > offset_dwords + num_vector_components) {
 424       /* There is room for at least 1 transform feedback output in the buffer.
 425        * Compute the number of additional transform feedback outputs the
 426        * buffer has room for.
 427        */
 428       buffer_size_minus_1 =
 429          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 430    } else {
 431       /* There isn't even room for a single transform feedback output in the
 432        * buffer.  We can't configure the binding table entry to prevent output
 433        * entirely; we'll have to rely on the geometry shader to detect
 434        * overflow.  But to minimize the damage in case of a bug, set up the
 435        * binding table entry to just allow a single output.
 436        */
 437       buffer_size_minus_1 = 0;
 438    }
 439    width = buffer_size_minus_1 & 0x7f;
 440    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 441    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 442
 443    switch (num_vector_components) {
 444    case 1:
 445       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 446       break;
 447    case 2:
 448       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 449       break;
 450    case 3:
 451       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 452       break;
 453    case 4:
 454       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 455       break;
 456    default:
 457       unreachable("Invalid vector size for transform feedback output");
 458    }
 459
 460    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 461       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 462       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 463       BRW_SURFACE_RC_READ_WRITE;
 464    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 465    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 466               height << BRW_SURFACE_HEIGHT_SHIFT);
 467    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 468               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 469    surf[4] = 0;
 470    surf[5] = 0;
 471
 472    /* Emit relocation to surface contents. */
 473    drm_intel_bo_emit_reloc(brw->batch.bo,
 474                            *out_offset + 4,
 475                            bo, offset_bytes,
 476                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 477 }
 478
 479 /* Creates a new WM constant buffer reflecting the current fragment program's
 480  * constants, if needed by the fragment program.
 481  *
 482  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 483  * state atom.
 484  */
 485 static void
 486 brw_upload_wm_pull_constants(struct brw_context *brw)
 487 {
 488    struct brw_stage_state *stage_state = &brw->wm.base;
 489    /* BRW_NEW_FRAGMENT_PROGRAM */
 490    struct brw_fragment_program *fp =
 491       (struct brw_fragment_program *) brw->fragment_program;
 492    /* BRW_NEW_FS_PROG_DATA */
 493    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 494
 495    /* _NEW_PROGRAM_CONSTANTS */
 496    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 497                              stage_state, prog_data, true);
 498 }
 499
 500 const struct brw_tracked_state brw_wm_pull_constants = {
 501    .dirty = {
 502       .mesa = _NEW_PROGRAM_CONSTANTS,
 503       .brw = BRW_NEW_BATCH |
 504              BRW_NEW_FRAGMENT_PROGRAM |
 505              BRW_NEW_FS_PROG_DATA,
 506    },
 507    .emit = brw_upload_wm_pull_constants,
 508 };
 509
 510 /**
 511  * Creates a null renderbuffer surface.
 512  *
 513  * This is used when the shader doesn't write to any color output.  An FB
 514  * write to target 0 will still be emitted, because that's how the thread is
 515  * terminated (and computed depth is returned), so we need to have the
 516  * hardware discard the target 0 color output..
 517  */
 518 static void
 519 brw_emit_null_surface_state(struct brw_context *brw,
 520                             unsigned width,
 521                             unsigned height,
 522                             unsigned samples,
 523                             uint32_t *out_offset)
 524 {
 525    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 526     * Notes):
 527     *
 528     *     A null surface will be used in instances where an actual surface is
 529     *     not bound. When a write message is generated to a null surface, no
 530     *     actual surface is written to. When a read message (including any
 531     *     sampling engine message) is generated to a null surface, the result
 532     *     is all zeros. Note that a null surface type is allowed to be used
 533     *     with all messages, even if it is not specificially indicated as
 534     *     supported. All of the remaining fields in surface state are ignored
 535     *     for null surfaces, with the following exceptions:
 536     *
 537     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 538     *       depth buffer’s corresponding state for all render target surfaces,
 539     *       including null.
 540     *
 541     *     - Surface Format must be R8G8B8A8_UNORM.
 542     */
 543    unsigned surface_type = BRW_SURFACE_NULL;
 544    drm_intel_bo *bo = NULL;
 545    unsigned pitch_minus_1 = 0;
 546    uint32_t multisampling_state = 0;
 547    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 548                                     out_offset);
 549
 550    if (samples > 1) {
 551       /* On Gen6, null render targets seem to cause GPU hangs when
 552        * multisampling.  So work around this problem by rendering into dummy
 553        * color buffer.
 554        *
 555        * To decrease the amount of memory needed by the workaround buffer, we
 556        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 557        * the amount of memory needed for the workaround buffer is
 558        * (width_in_tiles + height_in_tiles - 1) tiles.
 559        *
 560        * Note that since the workaround buffer will be interpreted by the
 561        * hardware as an interleaved multisampled buffer, we need to compute
 562        * width_in_tiles and height_in_tiles by dividing the width and height
 563        * by 16 rather than the normal Y-tile size of 32.
 564        */
 565       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 566       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 567       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 568       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 569                          size_needed);
 570       bo = brw->wm.multisampled_null_render_target_bo;
 571       surface_type = BRW_SURFACE_2D;
 572       pitch_minus_1 = 127;
 573       multisampling_state = brw_get_surface_num_multisamples(samples);
 574    }
 575
 576    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 577               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 578    if (brw->gen < 6) {
 579       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 580                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 581                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 582                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 583    }
 584    surf[1] = bo ? bo->offset64 : 0;
 585    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 586               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 587
 588    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 589     * Notes):
 590     *
 591     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 592     */
 593    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 594               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 595    surf[4] = multisampling_state;
 596    surf[5] = 0;
 597
 598    if (bo) {
 599       drm_intel_bo_emit_reloc(brw->batch.bo,
 600                               *out_offset + 4,
 601                               bo, 0,
 602                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 603    }
 604 }
 605
 606 /**
 607  * Sets up a surface state structure to point at the given region.
 608  * While it is only used for the front/back buffer currently, it should be
 609  * usable for further buffers when doing ARB_draw_buffer support.
 610  */
 611 static void
 612 brw_update_renderbuffer_surface(struct brw_context *brw,
 613                                 struct gl_renderbuffer *rb,
 614                                 bool layered,
 615                                 unsigned int unit)
 616 {
 617    struct gl_context *ctx = &brw->ctx;
 618    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 619    struct intel_mipmap_tree *mt = irb->mt;
 620    uint32_t *surf;
 621    uint32_t tile_x, tile_y;
 622    uint32_t format = 0;
 623    /* _NEW_BUFFERS */
 624    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 625    /* BRW_NEW_FS_PROG_DATA */
 626    uint32_t surf_index =
 627       brw->wm.prog_data->binding_table.render_target_start + unit;
 628
 629    assert(!layered);
 630
 631    if (rb->TexImage && !brw->has_surface_tile_offset) {
 632       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 633
 634       if (tile_x != 0 || tile_y != 0) {
 635          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 636           * destination in a miptree unless you actually setup your renderbuffer
 637           * as a miptree and used the fragile lod/array_index/etc. controls to
 638           * select the image.  So, instead, we just make a new single-level
 639           * miptree and render into that.
 640           */
 641          intel_renderbuffer_move_to_temp(brw, irb, false);
 642          mt = irb->mt;
 643       }
 644    }
 645
 646    intel_miptree_used_for_rendering(irb->mt);
 647
 648    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 649                           &brw->wm.base.surf_offset[surf_index]);
 650
 651    format = brw->render_target_format[rb_format];
 652    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 653       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 654                     __FUNCTION__, _mesa_get_format_name(rb_format));
 655    }
 656
 657    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 658               format << BRW_SURFACE_FORMAT_SHIFT);
 659
 660    /* reloc */
 661    assert(mt->offset % mt->cpp == 0);
 662    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 663               mt->bo->offset64 + mt->offset);
 664
 665    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 666               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 667
 668    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 669               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 670
 671    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 672
 673    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 674    /* Note that the low bits of these fields are missing, so
 675     * there's the possibility of getting in trouble.
 676     */
 677    assert(tile_x % 4 == 0);
 678    assert(tile_y % 2 == 0);
 679    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 680               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 681               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 682
 683    if (brw->gen < 6) {
 684       /* _NEW_COLOR */
 685       if (!ctx->Color.ColorLogicOpEnabled &&
 686           (ctx->Color.BlendEnabled & (1 << unit)))
 687          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 688
 689       if (!ctx->Color.ColorMask[unit][0])
 690          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 691       if (!ctx->Color.ColorMask[unit][1])
 692          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 693       if (!ctx->Color.ColorMask[unit][2])
 694          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 695
 696       /* As mentioned above, disable writes to the alpha component when the
 697        * renderbuffer is XRGB.
 698        */
 699       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 700           !ctx->Color.ColorMask[unit][3]) {
 701          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 702       }
 703    }
 704
 705    drm_intel_bo_emit_reloc(brw->batch.bo,
 706                            brw->wm.base.surf_offset[surf_index] + 4,
 707                            mt->bo,
 708                            surf[1] - mt->bo->offset64,
 709                            I915_GEM_DOMAIN_RENDER,
 710                            I915_GEM_DOMAIN_RENDER);
 711 }
 712
 713 /**
 714  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 715  */
 716 static void
 717 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 718 {
 719    struct gl_context *ctx = &brw->ctx;
 720    /* _NEW_BUFFERS */
 721    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 722    GLuint i;
 723
 724    /* _NEW_BUFFERS | _NEW_COLOR */
 725    /* Update surfaces for drawing buffers */
 726    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 727       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 728          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 729             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 730                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 731          } else {
 732             const uint32_t surf_index =
 733                brw->wm.prog_data->binding_table.render_target_start + i;
 734
 735             brw->vtbl.emit_null_surface_state(
 736                brw, fb->Width, fb->Height, fb->Visual.samples,
 737                &brw->wm.base.surf_offset[surf_index]);
 738          }
 739       }
 740    } else {
 741       const uint32_t surf_index =
 742          brw->wm.prog_data->binding_table.render_target_start;
 743
 744       brw->vtbl.emit_null_surface_state(
 745          brw, fb->Width, fb->Height, fb->Visual.samples,
 746          &brw->wm.base.surf_offset[surf_index]);
 747    }
 748    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 749 }
 750
 751 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 752    .dirty = {
 753       .mesa = _NEW_BUFFERS |
 754               _NEW_COLOR,
 755       .brw = BRW_NEW_BATCH |
 756              BRW_NEW_FS_PROG_DATA,
 757    },
 758    .emit = brw_update_renderbuffer_surfaces,
 759 };
 760
 761 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 762    .dirty = {
 763       .mesa = _NEW_BUFFERS,
 764       .brw = BRW_NEW_BATCH,
 765    },
 766    .emit = brw_update_renderbuffer_surfaces,
 767 };
 768
 769
 770 static void
 771 update_stage_texture_surfaces(struct brw_context *brw,
 772                               const struct gl_program *prog,
 773                               struct brw_stage_state *stage_state,
 774                               bool for_gather)
 775 {
 776    if (!prog)
 777       return;
 778
 779    struct gl_context *ctx = &brw->ctx;
 780
 781    uint32_t *surf_offset = stage_state->surf_offset;
 782
 783    /* BRW_NEW_*_PROG_DATA */
 784    if (for_gather)
 785       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 786    else
 787       surf_offset += stage_state->prog_data->binding_table.texture_start;
 788
 789    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 790    for (unsigned s = 0; s < num_samplers; s++) {
 791       surf_offset[s] = 0;
 792
 793       if (prog->SamplersUsed & (1 << s)) {
 794          const unsigned unit = prog->SamplerUnits[s];
 795
 796          /* _NEW_TEXTURE */
 797          if (ctx->Texture.Unit[unit]._Current) {
 798             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 799          }
 800       }
 801    }
 802 }
 803
 804
 805 /**
 806  * Construct SURFACE_STATE objects for enabled textures.
 807  */
 808 static void
 809 brw_update_texture_surfaces(struct brw_context *brw)
 810 {
 811    /* BRW_NEW_VERTEX_PROGRAM */
 812    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 813
 814    /* BRW_NEW_GEOMETRY_PROGRAM */
 815    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 816
 817    /* BRW_NEW_FRAGMENT_PROGRAM */
 818    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 819
 820    /* _NEW_TEXTURE */
 821    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 822    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 823    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 824
 825    /* emit alternate set of surface state for gather. this
 826     * allows the surface format to be overriden for only the
 827     * gather4 messages. */
 828    if (brw->gen < 8) {
 829       if (vs && vs->UsesGather)
 830          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 831       if (gs && gs->UsesGather)
 832          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 833       if (fs && fs->UsesGather)
 834          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 835    }
 836
 837    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 838 }
 839
 840 const struct brw_tracked_state brw_texture_surfaces = {
 841    .dirty = {
 842       .mesa = _NEW_TEXTURE,
 843       .brw = BRW_NEW_BATCH |
 844              BRW_NEW_FRAGMENT_PROGRAM |
 845              BRW_NEW_FS_PROG_DATA |
 846              BRW_NEW_GEOMETRY_PROGRAM |
 847              BRW_NEW_GS_PROG_DATA |
 848              BRW_NEW_TEXTURE_BUFFER |
 849              BRW_NEW_VERTEX_PROGRAM |
 850              BRW_NEW_VS_PROG_DATA,
 851    },
 852    .emit = brw_update_texture_surfaces,
 853 };
 854
 855 void
 856 brw_upload_ubo_surfaces(struct brw_context *brw,
 857                         struct gl_shader *shader,
 858                         struct brw_stage_state *stage_state,
 859                         struct brw_stage_prog_data *prog_data,
 860                         bool dword_pitch)
 861 {
 862    struct gl_context *ctx = &brw->ctx;
 863
 864    if (!shader)
 865       return;
 866
 867    uint32_t *surf_offsets =
 868       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 869
 870    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 871       struct gl_uniform_buffer_binding *binding;
 872       struct intel_buffer_object *intel_bo;
 873
 874       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 875       intel_bo = intel_buffer_object(binding->BufferObject);
 876       drm_intel_bo *bo =
 877          intel_bufferobj_buffer(brw, intel_bo,
 878                                 binding->Offset,
 879                                 binding->BufferObject->Size - binding->Offset);
 880
 881       /* Because behavior for referencing outside of the binding's size in the
 882        * glBindBufferRange case is undefined, we can just bind the whole buffer
 883        * glBindBufferBase wants and be a correct implementation.
 884        */
 885       brw_create_constant_surface(brw, bo, binding->Offset,
 886                                   bo->size - binding->Offset,
 887                                   &surf_offsets[i],
 888                                   dword_pitch);
 889    }
 890
 891    if (shader->NumUniformBlocks)
 892       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 893 }
 894
 895 static void
 896 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 897 {
 898    struct gl_context *ctx = &brw->ctx;
 899    /* _NEW_PROGRAM */
 900    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 901
 902    if (!prog)
 903       return;
 904
 905    /* BRW_NEW_FS_PROG_DATA */
 906    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 907                            &brw->wm.base, &brw->wm.prog_data->base, true);
 908 }
 909
 910 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 911    .dirty = {
 912       .mesa = _NEW_PROGRAM,
 913       .brw = BRW_NEW_BATCH |
 914              BRW_NEW_FS_PROG_DATA |
 915              BRW_NEW_UNIFORM_BUFFER,
 916    },
 917    .emit = brw_upload_wm_ubo_surfaces,
 918 };
 919
 920 void
 921 brw_upload_abo_surfaces(struct brw_context *brw,
 922                         struct gl_shader_program *prog,
 923                         struct brw_stage_state *stage_state,
 924                         struct brw_stage_prog_data *prog_data)
 925 {
 926    struct gl_context *ctx = &brw->ctx;
 927    uint32_t *surf_offsets =
 928       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 929
 930    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 931       struct gl_atomic_buffer_binding *binding =
 932          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 933       struct intel_buffer_object *intel_bo =
 934          intel_buffer_object(binding->BufferObject);
 935       drm_intel_bo *bo = intel_bufferobj_buffer(
 936          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 937
 938       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 939                                    bo->size - binding->Offset,
 940                                    &surf_offsets[i], true);
 941    }
 942
 943    if (prog->NumAtomicBuffers)
 944       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 945 }
 946
 947 static void
 948 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 949 {
 950    struct gl_context *ctx = &brw->ctx;
 951    /* _NEW_PROGRAM */
 952    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 953
 954    if (prog) {
 955       /* BRW_NEW_FS_PROG_DATA */
 956       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 957                               &brw->wm.prog_data->base);
 958    }
 959 }
 960
 961 const struct brw_tracked_state brw_wm_abo_surfaces = {
 962    .dirty = {
 963       .mesa = _NEW_PROGRAM,
 964       .brw = BRW_NEW_ATOMIC_BUFFER |
 965              BRW_NEW_BATCH |
 966              BRW_NEW_FS_PROG_DATA,
 967    },
 968    .emit = brw_upload_wm_abo_surfaces,
 969 };
 970
 971 void
 972 gen4_init_vtable_surface_functions(struct brw_context *brw)
 973 {
 974    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 975    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 976    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
 977    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 978 }