src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    /* If the texture's format is alpha-only, force R, G, and B to
 191     * 0.0. Similarly, if the texture's format has no alpha channel,
 192     * force the alpha value read to 1.0. This allows for the
 193     * implementation to use an RGBA texture for any of these formats
 194     * without leaking any unexpected values.
 195     */
 196    switch (img->_BaseFormat) {
 197    case GL_ALPHA:
 198       swizzles[0] = SWIZZLE_ZERO;
 199       swizzles[1] = SWIZZLE_ZERO;
 200       swizzles[2] = SWIZZLE_ZERO;
 201       break;
 202    case GL_LUMINANCE:
 203       if (t->_IsIntegerFormat) {
 204          swizzles[0] = SWIZZLE_X;
 205          swizzles[1] = SWIZZLE_X;
 206          swizzles[2] = SWIZZLE_X;
 207          swizzles[3] = SWIZZLE_ONE;
 208       }
 209       break;
 210    case GL_RED:
 211    case GL_RG:
 212    case GL_RGB:
 213       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 214          swizzles[3] = SWIZZLE_ONE;
 215       break;
 216    }
 217
 218    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 219                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 220                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 221                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 222 }
 223
 224 static void
 225 gen4_emit_buffer_surface_state(struct brw_context *brw,
 226                                uint32_t *out_offset,
 227                                drm_intel_bo *bo,
 228                                unsigned buffer_offset,
 229                                unsigned surface_format,
 230                                unsigned buffer_size,
 231                                unsigned pitch,
 232                                bool rw)
 233 {
 234    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 235                                     6 * 4, 32, out_offset);
 236    memset(surf, 0, 6 * 4);
 237
 238    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 239              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 240              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 241    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 242    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 243              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 244    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 245              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 246
 247    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 248     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 249     * physical cache.  It is mapped in hardware to the sampler cache."
 250     */
 251    if (bo) {
 252       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 253                               bo, buffer_offset,
 254                               I915_GEM_DOMAIN_SAMPLER,
 255                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 256    }
 257 }
 258
 259 void
 260 brw_update_buffer_texture_surface(struct gl_context *ctx,
 261                                   unsigned unit,
 262                                   uint32_t *surf_offset)
 263 {
 264    struct brw_context *brw = brw_context(ctx);
 265    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 266    struct intel_buffer_object *intel_obj =
 267       intel_buffer_object(tObj->BufferObject);
 268    uint32_t size = tObj->BufferSize;
 269    drm_intel_bo *bo = NULL;
 270    mesa_format format = tObj->_BufferObjectFormat;
 271    uint32_t brw_format = brw_format_for_mesa_format(format);
 272    int texel_size = _mesa_get_format_bytes(format);
 273
 274    if (intel_obj) {
 275       size = MIN2(size, intel_obj->Base.Size);
 276       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 277    }
 278
 279    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 280       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 281                     _mesa_get_format_name(format));
 282    }
 283
 284    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 285                                        tObj->BufferOffset,
 286                                        brw_format,
 287                                        size / texel_size,
 288                                        texel_size,
 289                                        false /* rw */);
 290 }
 291
 292 static void
 293 brw_update_texture_surface(struct gl_context *ctx,
 294                            unsigned unit,
 295                            uint32_t *surf_offset,
 296                            bool for_gather)
 297 {
 298    struct brw_context *brw = brw_context(ctx);
 299    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 300    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 301    struct intel_mipmap_tree *mt = intelObj->mt;
 302    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 303    uint32_t *surf;
 304
 305    /* BRW_NEW_TEXTURE_BUFFER */
 306    if (tObj->Target == GL_TEXTURE_BUFFER) {
 307       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 308       return;
 309    }
 310
 311    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 312                           6 * 4, 32, surf_offset);
 313
 314    uint32_t tex_format = translate_tex_format(brw, mt->format,
 315                                               sampler->sRGBDecode);
 316
 317    if (for_gather) {
 318       /* Sandybridge's gather4 message is broken for integer formats.
 319        * To work around this, we pretend the surface is UNORM for
 320        * 8 or 16-bit formats, and emit shader instructions to recover
 321        * the real INT/UINT value.  For 32-bit formats, we pretend
 322        * the surface is FLOAT, and simply reinterpret the resulting
 323        * bits.
 324        */
 325       switch (tex_format) {
 326       case BRW_SURFACEFORMAT_R8_SINT:
 327       case BRW_SURFACEFORMAT_R8_UINT:
 328          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 329          break;
 330
 331       case BRW_SURFACEFORMAT_R16_SINT:
 332       case BRW_SURFACEFORMAT_R16_UINT:
 333          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 334          break;
 335
 336       case BRW_SURFACEFORMAT_R32_SINT:
 337       case BRW_SURFACEFORMAT_R32_UINT:
 338          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 339          break;
 340
 341       default:
 342          break;
 343       }
 344    }
 345
 346    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 347               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 348               BRW_SURFACE_CUBEFACE_ENABLES |
 349               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 350
 351    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 352
 353    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 354               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 355               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 356
 357    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 358               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 359               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 360
 361    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 362               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 363
 364    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 365
 366    /* Emit relocation to surface contents */
 367    drm_intel_bo_emit_reloc(brw->batch.bo,
 368                            *surf_offset + 4,
 369                            mt->bo,
 370                            surf[1] - mt->bo->offset64,
 371                            I915_GEM_DOMAIN_SAMPLER, 0);
 372 }
 373
 374 /**
 375  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 376  * read from this buffer with Data Port Read instructions/messages.
 377  */
 378 void
 379 brw_create_constant_surface(struct brw_context *brw,
 380                             drm_intel_bo *bo,
 381                             uint32_t offset,
 382                             uint32_t size,
 383                             uint32_t *out_offset,
 384                             bool dword_pitch)
 385 {
 386    uint32_t stride = dword_pitch ? 4 : 16;
 387    uint32_t elements = ALIGN(size, stride) / stride;
 388
 389    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 390                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 391                                        elements, stride, false);
 392 }
 393
 394 /**
 395  * Set up a binding table entry for use by stream output logic (transform
 396  * feedback).
 397  *
 398  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 399  */
 400 void
 401 brw_update_sol_surface(struct brw_context *brw,
 402                        struct gl_buffer_object *buffer_obj,
 403                        uint32_t *out_offset, unsigned num_vector_components,
 404                        unsigned stride_dwords, unsigned offset_dwords)
 405 {
 406    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 407    uint32_t offset_bytes = 4 * offset_dwords;
 408    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 409                                              offset_bytes,
 410                                              buffer_obj->Size - offset_bytes);
 411    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 412                                     out_offset);
 413    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 414    size_t size_dwords = buffer_obj->Size / 4;
 415    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 416
 417    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 418     * too big to map using a single binding table entry?
 419     */
 420    assert((size_dwords - offset_dwords) / stride_dwords
 421           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 422
 423    if (size_dwords > offset_dwords + num_vector_components) {
 424       /* There is room for at least 1 transform feedback output in the buffer.
 425        * Compute the number of additional transform feedback outputs the
 426        * buffer has room for.
 427        */
 428       buffer_size_minus_1 =
 429          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 430    } else {
 431       /* There isn't even room for a single transform feedback output in the
 432        * buffer.  We can't configure the binding table entry to prevent output
 433        * entirely; we'll have to rely on the geometry shader to detect
 434        * overflow.  But to minimize the damage in case of a bug, set up the
 435        * binding table entry to just allow a single output.
 436        */
 437       buffer_size_minus_1 = 0;
 438    }
 439    width = buffer_size_minus_1 & 0x7f;
 440    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 441    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 442
 443    switch (num_vector_components) {
 444    case 1:
 445       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 446       break;
 447    case 2:
 448       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 449       break;
 450    case 3:
 451       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 452       break;
 453    case 4:
 454       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 455       break;
 456    default:
 457       unreachable("Invalid vector size for transform feedback output");
 458    }
 459
 460    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 461       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 462       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 463       BRW_SURFACE_RC_READ_WRITE;
 464    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 465    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 466               height << BRW_SURFACE_HEIGHT_SHIFT);
 467    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 468               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 469    surf[4] = 0;
 470    surf[5] = 0;
 471
 472    /* Emit relocation to surface contents. */
 473    drm_intel_bo_emit_reloc(brw->batch.bo,
 474                            *out_offset + 4,
 475                            bo, offset_bytes,
 476                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 477 }
 478
 479 /* Creates a new WM constant buffer reflecting the current fragment program's
 480  * constants, if needed by the fragment program.
 481  *
 482  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 483  * state atom.
 484  */
 485 static void
 486 brw_upload_wm_pull_constants(struct brw_context *brw)
 487 {
 488    struct brw_stage_state *stage_state = &brw->wm.base;
 489    /* BRW_NEW_FRAGMENT_PROGRAM */
 490    struct brw_fragment_program *fp =
 491       (struct brw_fragment_program *) brw->fragment_program;
 492    /* BRW_NEW_FS_PROG_DATA */
 493    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 494
 495    /* _NEW_PROGRAM_CONSTANTS */
 496    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 497                              stage_state, prog_data, true);
 498 }
 499
 500 const struct brw_tracked_state brw_wm_pull_constants = {
 501    .dirty = {
 502       .mesa = _NEW_PROGRAM_CONSTANTS,
 503       .brw = BRW_NEW_BATCH |
 504              BRW_NEW_FRAGMENT_PROGRAM |
 505              BRW_NEW_FS_PROG_DATA,
 506    },
 507    .emit = brw_upload_wm_pull_constants,
 508 };
 509
 510 /**
 511  * Creates a null renderbuffer surface.
 512  *
 513  * This is used when the shader doesn't write to any color output.  An FB
 514  * write to target 0 will still be emitted, because that's how the thread is
 515  * terminated (and computed depth is returned), so we need to have the
 516  * hardware discard the target 0 color output..
 517  */
 518 static void
 519 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 520 {
 521    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 522     * Notes):
 523     *
 524     *     A null surface will be used in instances where an actual surface is
 525     *     not bound. When a write message is generated to a null surface, no
 526     *     actual surface is written to. When a read message (including any
 527     *     sampling engine message) is generated to a null surface, the result
 528     *     is all zeros. Note that a null surface type is allowed to be used
 529     *     with all messages, even if it is not specificially indicated as
 530     *     supported. All of the remaining fields in surface state are ignored
 531     *     for null surfaces, with the following exceptions:
 532     *
 533     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 534     *       depth buffer’s corresponding state for all render target surfaces,
 535     *       including null.
 536     *
 537     *     - Surface Format must be R8G8B8A8_UNORM.
 538     */
 539    struct gl_context *ctx = &brw->ctx;
 540    uint32_t *surf;
 541    unsigned surface_type = BRW_SURFACE_NULL;
 542    drm_intel_bo *bo = NULL;
 543    unsigned pitch_minus_1 = 0;
 544    uint32_t multisampling_state = 0;
 545    /* BRW_NEW_FS_PROG_DATA */
 546    uint32_t surf_index =
 547       brw->wm.prog_data->binding_table.render_target_start + unit;
 548
 549    /* _NEW_BUFFERS */
 550    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 551
 552    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 553                           &brw->wm.base.surf_offset[surf_index]);
 554
 555    if (fb->Visual.samples > 1) {
 556       /* On Gen6, null render targets seem to cause GPU hangs when
 557        * multisampling.  So work around this problem by rendering into dummy
 558        * color buffer.
 559        *
 560        * To decrease the amount of memory needed by the workaround buffer, we
 561        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 562        * the amount of memory needed for the workaround buffer is
 563        * (width_in_tiles + height_in_tiles - 1) tiles.
 564        *
 565        * Note that since the workaround buffer will be interpreted by the
 566        * hardware as an interleaved multisampled buffer, we need to compute
 567        * width_in_tiles and height_in_tiles by dividing the width and height
 568        * by 16 rather than the normal Y-tile size of 32.
 569        */
 570       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 571       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 572       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 573       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 574                          size_needed);
 575       bo = brw->wm.multisampled_null_render_target_bo;
 576       surface_type = BRW_SURFACE_2D;
 577       pitch_minus_1 = 127;
 578       multisampling_state =
 579          brw_get_surface_num_multisamples(fb->Visual.samples);
 580    }
 581
 582    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 583               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 584    if (brw->gen < 6) {
 585       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 586                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 587                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 588                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 589    }
 590    surf[1] = bo ? bo->offset64 : 0;
 591    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 592               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 593
 594    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 595     * Notes):
 596     *
 597     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 598     */
 599    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 600               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 601    surf[4] = multisampling_state;
 602    surf[5] = 0;
 603
 604    if (bo) {
 605       drm_intel_bo_emit_reloc(brw->batch.bo,
 606                               brw->wm.base.surf_offset[surf_index] + 4,
 607                               bo, 0,
 608                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 609    }
 610 }
 611
 612 /**
 613  * Sets up a surface state structure to point at the given region.
 614  * While it is only used for the front/back buffer currently, it should be
 615  * usable for further buffers when doing ARB_draw_buffer support.
 616  */
 617 static void
 618 brw_update_renderbuffer_surface(struct brw_context *brw,
 619                                 struct gl_renderbuffer *rb,
 620                                 bool layered,
 621                                 unsigned int unit)
 622 {
 623    struct gl_context *ctx = &brw->ctx;
 624    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 625    struct intel_mipmap_tree *mt = irb->mt;
 626    uint32_t *surf;
 627    uint32_t tile_x, tile_y;
 628    uint32_t format = 0;
 629    /* _NEW_BUFFERS */
 630    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 631    /* BRW_NEW_FS_PROG_DATA */
 632    uint32_t surf_index =
 633       brw->wm.prog_data->binding_table.render_target_start + unit;
 634
 635    assert(!layered);
 636
 637    if (rb->TexImage && !brw->has_surface_tile_offset) {
 638       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 639
 640       if (tile_x != 0 || tile_y != 0) {
 641          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 642           * destination in a miptree unless you actually setup your renderbuffer
 643           * as a miptree and used the fragile lod/array_index/etc. controls to
 644           * select the image.  So, instead, we just make a new single-level
 645           * miptree and render into that.
 646           */
 647          intel_renderbuffer_move_to_temp(brw, irb, false);
 648          mt = irb->mt;
 649       }
 650    }
 651
 652    intel_miptree_used_for_rendering(irb->mt);
 653
 654    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 655                           &brw->wm.base.surf_offset[surf_index]);
 656
 657    format = brw->render_target_format[rb_format];
 658    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 659       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 660                     __FUNCTION__, _mesa_get_format_name(rb_format));
 661    }
 662
 663    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 664               format << BRW_SURFACE_FORMAT_SHIFT);
 665
 666    /* reloc */
 667    assert(mt->offset % mt->cpp == 0);
 668    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 669               mt->bo->offset64 + mt->offset);
 670
 671    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 672               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 673
 674    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 675               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 676
 677    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 678
 679    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 680    /* Note that the low bits of these fields are missing, so
 681     * there's the possibility of getting in trouble.
 682     */
 683    assert(tile_x % 4 == 0);
 684    assert(tile_y % 2 == 0);
 685    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 686               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 687               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 688
 689    if (brw->gen < 6) {
 690       /* _NEW_COLOR */
 691       if (!ctx->Color.ColorLogicOpEnabled &&
 692           (ctx->Color.BlendEnabled & (1 << unit)))
 693          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 694
 695       if (!ctx->Color.ColorMask[unit][0])
 696          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 697       if (!ctx->Color.ColorMask[unit][1])
 698          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 699       if (!ctx->Color.ColorMask[unit][2])
 700          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 701
 702       /* As mentioned above, disable writes to the alpha component when the
 703        * renderbuffer is XRGB.
 704        */
 705       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 706           !ctx->Color.ColorMask[unit][3]) {
 707          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 708       }
 709    }
 710
 711    drm_intel_bo_emit_reloc(brw->batch.bo,
 712                            brw->wm.base.surf_offset[surf_index] + 4,
 713                            mt->bo,
 714                            surf[1] - mt->bo->offset64,
 715                            I915_GEM_DOMAIN_RENDER,
 716                            I915_GEM_DOMAIN_RENDER);
 717 }
 718
 719 /**
 720  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 721  */
 722 static void
 723 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 724 {
 725    struct gl_context *ctx = &brw->ctx;
 726    GLuint i;
 727
 728    /* _NEW_BUFFERS | _NEW_COLOR */
 729    /* Update surfaces for drawing buffers */
 730    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 731       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 732          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 733             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 734                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 735          } else {
 736             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 737          }
 738       }
 739    } else {
 740       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 741    }
 742    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 743 }
 744
 745 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 746    .dirty = {
 747       .mesa = _NEW_BUFFERS |
 748               _NEW_COLOR,
 749       .brw = BRW_NEW_BATCH |
 750              BRW_NEW_FS_PROG_DATA,
 751    },
 752    .emit = brw_update_renderbuffer_surfaces,
 753 };
 754
 755 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 756    .dirty = {
 757       .mesa = _NEW_BUFFERS,
 758       .brw = BRW_NEW_BATCH,
 759    },
 760    .emit = brw_update_renderbuffer_surfaces,
 761 };
 762
 763
 764 static void
 765 update_stage_texture_surfaces(struct brw_context *brw,
 766                               const struct gl_program *prog,
 767                               struct brw_stage_state *stage_state,
 768                               bool for_gather)
 769 {
 770    if (!prog)
 771       return;
 772
 773    struct gl_context *ctx = &brw->ctx;
 774
 775    uint32_t *surf_offset = stage_state->surf_offset;
 776
 777    /* BRW_NEW_*_PROG_DATA */
 778    if (for_gather)
 779       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 780    else
 781       surf_offset += stage_state->prog_data->binding_table.texture_start;
 782
 783    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 784    for (unsigned s = 0; s < num_samplers; s++) {
 785       surf_offset[s] = 0;
 786
 787       if (prog->SamplersUsed & (1 << s)) {
 788          const unsigned unit = prog->SamplerUnits[s];
 789
 790          /* _NEW_TEXTURE */
 791          if (ctx->Texture.Unit[unit]._Current) {
 792             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 793          }
 794       }
 795    }
 796 }
 797
 798
 799 /**
 800  * Construct SURFACE_STATE objects for enabled textures.
 801  */
 802 static void
 803 brw_update_texture_surfaces(struct brw_context *brw)
 804 {
 805    /* BRW_NEW_VERTEX_PROGRAM */
 806    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 807
 808    /* BRW_NEW_GEOMETRY_PROGRAM */
 809    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 810
 811    /* BRW_NEW_FRAGMENT_PROGRAM */
 812    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 813
 814    /* _NEW_TEXTURE */
 815    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 816    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 817    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 818
 819    /* emit alternate set of surface state for gather. this
 820     * allows the surface format to be overriden for only the
 821     * gather4 messages. */
 822    if (brw->gen < 8) {
 823       if (vs && vs->UsesGather)
 824          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 825       if (gs && gs->UsesGather)
 826          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 827       if (fs && fs->UsesGather)
 828          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 829    }
 830
 831    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 832 }
 833
 834 const struct brw_tracked_state brw_texture_surfaces = {
 835    .dirty = {
 836       .mesa = _NEW_TEXTURE,
 837       .brw = BRW_NEW_BATCH |
 838              BRW_NEW_FRAGMENT_PROGRAM |
 839              BRW_NEW_FS_PROG_DATA |
 840              BRW_NEW_GEOMETRY_PROGRAM |
 841              BRW_NEW_GS_PROG_DATA |
 842              BRW_NEW_TEXTURE_BUFFER |
 843              BRW_NEW_VERTEX_PROGRAM |
 844              BRW_NEW_VS_PROG_DATA,
 845    },
 846    .emit = brw_update_texture_surfaces,
 847 };
 848
 849 void
 850 brw_upload_ubo_surfaces(struct brw_context *brw,
 851                         struct gl_shader *shader,
 852                         struct brw_stage_state *stage_state,
 853                         struct brw_stage_prog_data *prog_data,
 854                         bool dword_pitch)
 855 {
 856    struct gl_context *ctx = &brw->ctx;
 857
 858    if (!shader)
 859       return;
 860
 861    uint32_t *surf_offsets =
 862       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 863
 864    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 865       struct gl_uniform_buffer_binding *binding;
 866       struct intel_buffer_object *intel_bo;
 867
 868       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 869       intel_bo = intel_buffer_object(binding->BufferObject);
 870       drm_intel_bo *bo =
 871          intel_bufferobj_buffer(brw, intel_bo,
 872                                 binding->Offset,
 873                                 binding->BufferObject->Size - binding->Offset);
 874
 875       /* Because behavior for referencing outside of the binding's size in the
 876        * glBindBufferRange case is undefined, we can just bind the whole buffer
 877        * glBindBufferBase wants and be a correct implementation.
 878        */
 879       brw_create_constant_surface(brw, bo, binding->Offset,
 880                                   bo->size - binding->Offset,
 881                                   &surf_offsets[i],
 882                                   dword_pitch);
 883    }
 884
 885    if (shader->NumUniformBlocks)
 886       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 887 }
 888
 889 static void
 890 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 891 {
 892    struct gl_context *ctx = &brw->ctx;
 893    /* _NEW_PROGRAM */
 894    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 895
 896    if (!prog)
 897       return;
 898
 899    /* BRW_NEW_FS_PROG_DATA */
 900    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 901                            &brw->wm.base, &brw->wm.prog_data->base, true);
 902 }
 903
 904 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 905    .dirty = {
 906       .mesa = _NEW_PROGRAM,
 907       .brw = BRW_NEW_BATCH |
 908              BRW_NEW_FS_PROG_DATA |
 909              BRW_NEW_UNIFORM_BUFFER,
 910    },
 911    .emit = brw_upload_wm_ubo_surfaces,
 912 };
 913
 914 void
 915 brw_upload_abo_surfaces(struct brw_context *brw,
 916                         struct gl_shader_program *prog,
 917                         struct brw_stage_state *stage_state,
 918                         struct brw_stage_prog_data *prog_data)
 919 {
 920    struct gl_context *ctx = &brw->ctx;
 921    uint32_t *surf_offsets =
 922       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 923
 924    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 925       struct gl_atomic_buffer_binding *binding =
 926          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 927       struct intel_buffer_object *intel_bo =
 928          intel_buffer_object(binding->BufferObject);
 929       drm_intel_bo *bo = intel_bufferobj_buffer(
 930          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 931
 932       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 933                                    bo->size - binding->Offset,
 934                                    &surf_offsets[i], true);
 935    }
 936
 937    if (prog->NumAtomicBuffers)
 938       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 939 }
 940
 941 static void
 942 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 943 {
 944    struct gl_context *ctx = &brw->ctx;
 945    /* _NEW_PROGRAM */
 946    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 947
 948    if (prog) {
 949       /* BRW_NEW_FS_PROG_DATA */
 950       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 951                               &brw->wm.prog_data->base);
 952    }
 953 }
 954
 955 const struct brw_tracked_state brw_wm_abo_surfaces = {
 956    .dirty = {
 957       .mesa = _NEW_PROGRAM,
 958       .brw = BRW_NEW_ATOMIC_BUFFER |
 959              BRW_NEW_BATCH |
 960              BRW_NEW_FS_PROG_DATA,
 961    },
 962    .emit = brw_upload_wm_abo_surfaces,
 963 };
 964
 965 void
 966 gen4_init_vtable_surface_functions(struct brw_context *brw)
 967 {
 968    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 969    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 970    brw->vtbl.update_null_renderbuffer_surface =
 971       brw_update_null_renderbuffer_surface;
 972    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 973 }