src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    /* If the texture's format is alpha-only, force R, G, and B to
 191     * 0.0. Similarly, if the texture's format has no alpha channel,
 192     * force the alpha value read to 1.0. This allows for the
 193     * implementation to use an RGBA texture for any of these formats
 194     * without leaking any unexpected values.
 195     */
 196    switch (img->_BaseFormat) {
 197    case GL_ALPHA:
 198       swizzles[0] = SWIZZLE_ZERO;
 199       swizzles[1] = SWIZZLE_ZERO;
 200       swizzles[2] = SWIZZLE_ZERO;
 201       break;
 202    case GL_RED:
 203    case GL_RG:
 204    case GL_RGB:
 205       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 206          swizzles[3] = SWIZZLE_ONE;
 207       break;
 208    }
 209
 210    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 211                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 212                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 213                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 214 }
 215
 216 static void
 217 gen4_emit_buffer_surface_state(struct brw_context *brw,
 218                                uint32_t *out_offset,
 219                                drm_intel_bo *bo,
 220                                unsigned buffer_offset,
 221                                unsigned surface_format,
 222                                unsigned buffer_size,
 223                                unsigned pitch,
 224                                bool rw)
 225 {
 226    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 227                                     6 * 4, 32, out_offset);
 228    memset(surf, 0, 6 * 4);
 229
 230    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 231              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 232              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 233    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 234    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 235              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 236    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 237              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 238
 239    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 240     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 241     * physical cache.  It is mapped in hardware to the sampler cache."
 242     */
 243    if (bo) {
 244       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 245                               bo, buffer_offset,
 246                               I915_GEM_DOMAIN_SAMPLER,
 247                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 248    }
 249 }
 250
 251 void
 252 brw_update_buffer_texture_surface(struct gl_context *ctx,
 253                                   unsigned unit,
 254                                   uint32_t *surf_offset)
 255 {
 256    struct brw_context *brw = brw_context(ctx);
 257    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 258    struct intel_buffer_object *intel_obj =
 259       intel_buffer_object(tObj->BufferObject);
 260    uint32_t size = tObj->BufferSize;
 261    drm_intel_bo *bo = NULL;
 262    mesa_format format = tObj->_BufferObjectFormat;
 263    uint32_t brw_format = brw_format_for_mesa_format(format);
 264    int texel_size = _mesa_get_format_bytes(format);
 265
 266    if (intel_obj) {
 267       size = MIN2(size, intel_obj->Base.Size);
 268       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 269    }
 270
 271    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 272       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 273                     _mesa_get_format_name(format));
 274    }
 275
 276    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 277                                        tObj->BufferOffset,
 278                                        brw_format,
 279                                        size / texel_size,
 280                                        texel_size,
 281                                        false /* rw */);
 282 }
 283
 284 static void
 285 brw_update_texture_surface(struct gl_context *ctx,
 286                            unsigned unit,
 287                            uint32_t *surf_offset,
 288                            bool for_gather)
 289 {
 290    struct brw_context *brw = brw_context(ctx);
 291    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 292    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 293    struct intel_mipmap_tree *mt = intelObj->mt;
 294    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 295    uint32_t *surf;
 296
 297    /* BRW_NEW_TEXTURE_BUFFER */
 298    if (tObj->Target == GL_TEXTURE_BUFFER) {
 299       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 300       return;
 301    }
 302
 303    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 304                           6 * 4, 32, surf_offset);
 305
 306    uint32_t tex_format = translate_tex_format(brw, mt->format,
 307                                               sampler->sRGBDecode);
 308
 309    if (for_gather) {
 310       /* Sandybridge's gather4 message is broken for integer formats.
 311        * To work around this, we pretend the surface is UNORM for
 312        * 8 or 16-bit formats, and emit shader instructions to recover
 313        * the real INT/UINT value.  For 32-bit formats, we pretend
 314        * the surface is FLOAT, and simply reinterpret the resulting
 315        * bits.
 316        */
 317       switch (tex_format) {
 318       case BRW_SURFACEFORMAT_R8_SINT:
 319       case BRW_SURFACEFORMAT_R8_UINT:
 320          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 321          break;
 322
 323       case BRW_SURFACEFORMAT_R16_SINT:
 324       case BRW_SURFACEFORMAT_R16_UINT:
 325          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 326          break;
 327
 328       case BRW_SURFACEFORMAT_R32_SINT:
 329       case BRW_SURFACEFORMAT_R32_UINT:
 330          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 331          break;
 332
 333       default:
 334          break;
 335       }
 336    }
 337
 338    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 339               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 340               BRW_SURFACE_CUBEFACE_ENABLES |
 341               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 342
 343    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 344
 345    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 346               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 347               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 348
 349    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 350               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 351               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 352
 353    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 354               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 355
 356    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 357
 358    /* Emit relocation to surface contents */
 359    drm_intel_bo_emit_reloc(brw->batch.bo,
 360                            *surf_offset + 4,
 361                            mt->bo,
 362                            surf[1] - mt->bo->offset64,
 363                            I915_GEM_DOMAIN_SAMPLER, 0);
 364 }
 365
 366 /**
 367  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 368  * read from this buffer with Data Port Read instructions/messages.
 369  */
 370 void
 371 brw_create_constant_surface(struct brw_context *brw,
 372                             drm_intel_bo *bo,
 373                             uint32_t offset,
 374                             uint32_t size,
 375                             uint32_t *out_offset,
 376                             bool dword_pitch)
 377 {
 378    uint32_t stride = dword_pitch ? 4 : 16;
 379    uint32_t elements = ALIGN(size, stride) / stride;
 380
 381    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 382                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 383                                        elements, stride, false);
 384 }
 385
 386 /**
 387  * Set up a binding table entry for use by stream output logic (transform
 388  * feedback).
 389  *
 390  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 391  */
 392 void
 393 brw_update_sol_surface(struct brw_context *brw,
 394                        struct gl_buffer_object *buffer_obj,
 395                        uint32_t *out_offset, unsigned num_vector_components,
 396                        unsigned stride_dwords, unsigned offset_dwords)
 397 {
 398    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 399    uint32_t offset_bytes = 4 * offset_dwords;
 400    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 401                                              offset_bytes,
 402                                              buffer_obj->Size - offset_bytes);
 403    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 404                                     out_offset);
 405    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 406    size_t size_dwords = buffer_obj->Size / 4;
 407    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 408
 409    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 410     * too big to map using a single binding table entry?
 411     */
 412    assert((size_dwords - offset_dwords) / stride_dwords
 413           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 414
 415    if (size_dwords > offset_dwords + num_vector_components) {
 416       /* There is room for at least 1 transform feedback output in the buffer.
 417        * Compute the number of additional transform feedback outputs the
 418        * buffer has room for.
 419        */
 420       buffer_size_minus_1 =
 421          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 422    } else {
 423       /* There isn't even room for a single transform feedback output in the
 424        * buffer.  We can't configure the binding table entry to prevent output
 425        * entirely; we'll have to rely on the geometry shader to detect
 426        * overflow.  But to minimize the damage in case of a bug, set up the
 427        * binding table entry to just allow a single output.
 428        */
 429       buffer_size_minus_1 = 0;
 430    }
 431    width = buffer_size_minus_1 & 0x7f;
 432    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 433    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 434
 435    switch (num_vector_components) {
 436    case 1:
 437       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 438       break;
 439    case 2:
 440       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 441       break;
 442    case 3:
 443       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 444       break;
 445    case 4:
 446       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 447       break;
 448    default:
 449       unreachable("Invalid vector size for transform feedback output");
 450    }
 451
 452    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 453       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 454       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 455       BRW_SURFACE_RC_READ_WRITE;
 456    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 457    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 458               height << BRW_SURFACE_HEIGHT_SHIFT);
 459    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 460               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 461    surf[4] = 0;
 462    surf[5] = 0;
 463
 464    /* Emit relocation to surface contents. */
 465    drm_intel_bo_emit_reloc(brw->batch.bo,
 466                            *out_offset + 4,
 467                            bo, offset_bytes,
 468                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 469 }
 470
 471 /* Creates a new WM constant buffer reflecting the current fragment program's
 472  * constants, if needed by the fragment program.
 473  *
 474  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 475  * state atom.
 476  */
 477 static void
 478 brw_upload_wm_pull_constants(struct brw_context *brw)
 479 {
 480    struct brw_stage_state *stage_state = &brw->wm.base;
 481    /* BRW_NEW_FRAGMENT_PROGRAM */
 482    struct brw_fragment_program *fp =
 483       (struct brw_fragment_program *) brw->fragment_program;
 484    /* BRW_NEW_FS_PROG_DATA */
 485    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 486
 487    /* _NEW_PROGRAM_CONSTANTS */
 488    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 489                              stage_state, prog_data, true);
 490 }
 491
 492 const struct brw_tracked_state brw_wm_pull_constants = {
 493    .dirty = {
 494       .mesa = _NEW_PROGRAM_CONSTANTS,
 495       .brw = BRW_NEW_BATCH |
 496              BRW_NEW_FRAGMENT_PROGRAM |
 497              BRW_NEW_FS_PROG_DATA,
 498    },
 499    .emit = brw_upload_wm_pull_constants,
 500 };
 501
 502 /**
 503  * Creates a null renderbuffer surface.
 504  *
 505  * This is used when the shader doesn't write to any color output.  An FB
 506  * write to target 0 will still be emitted, because that's how the thread is
 507  * terminated (and computed depth is returned), so we need to have the
 508  * hardware discard the target 0 color output..
 509  */
 510 static void
 511 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 512 {
 513    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 514     * Notes):
 515     *
 516     *     A null surface will be used in instances where an actual surface is
 517     *     not bound. When a write message is generated to a null surface, no
 518     *     actual surface is written to. When a read message (including any
 519     *     sampling engine message) is generated to a null surface, the result
 520     *     is all zeros. Note that a null surface type is allowed to be used
 521     *     with all messages, even if it is not specificially indicated as
 522     *     supported. All of the remaining fields in surface state are ignored
 523     *     for null surfaces, with the following exceptions:
 524     *
 525     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 526     *       depth buffer’s corresponding state for all render target surfaces,
 527     *       including null.
 528     *
 529     *     - Surface Format must be R8G8B8A8_UNORM.
 530     */
 531    struct gl_context *ctx = &brw->ctx;
 532    uint32_t *surf;
 533    unsigned surface_type = BRW_SURFACE_NULL;
 534    drm_intel_bo *bo = NULL;
 535    unsigned pitch_minus_1 = 0;
 536    uint32_t multisampling_state = 0;
 537    /* BRW_NEW_FS_PROG_DATA */
 538    uint32_t surf_index =
 539       brw->wm.prog_data->binding_table.render_target_start + unit;
 540
 541    /* _NEW_BUFFERS */
 542    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 543
 544    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 545                           &brw->wm.base.surf_offset[surf_index]);
 546
 547    if (fb->Visual.samples > 1) {
 548       /* On Gen6, null render targets seem to cause GPU hangs when
 549        * multisampling.  So work around this problem by rendering into dummy
 550        * color buffer.
 551        *
 552        * To decrease the amount of memory needed by the workaround buffer, we
 553        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 554        * the amount of memory needed for the workaround buffer is
 555        * (width_in_tiles + height_in_tiles - 1) tiles.
 556        *
 557        * Note that since the workaround buffer will be interpreted by the
 558        * hardware as an interleaved multisampled buffer, we need to compute
 559        * width_in_tiles and height_in_tiles by dividing the width and height
 560        * by 16 rather than the normal Y-tile size of 32.
 561        */
 562       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 563       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 564       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 565       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 566                          size_needed);
 567       bo = brw->wm.multisampled_null_render_target_bo;
 568       surface_type = BRW_SURFACE_2D;
 569       pitch_minus_1 = 127;
 570       multisampling_state =
 571          brw_get_surface_num_multisamples(fb->Visual.samples);
 572    }
 573
 574    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 575               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 576    if (brw->gen < 6) {
 577       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 578                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 579                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 580                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 581    }
 582    surf[1] = bo ? bo->offset64 : 0;
 583    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 584               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 585
 586    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 587     * Notes):
 588     *
 589     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 590     */
 591    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 592               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 593    surf[4] = multisampling_state;
 594    surf[5] = 0;
 595
 596    if (bo) {
 597       drm_intel_bo_emit_reloc(brw->batch.bo,
 598                               brw->wm.base.surf_offset[surf_index] + 4,
 599                               bo, 0,
 600                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 601    }
 602 }
 603
 604 /**
 605  * Sets up a surface state structure to point at the given region.
 606  * While it is only used for the front/back buffer currently, it should be
 607  * usable for further buffers when doing ARB_draw_buffer support.
 608  */
 609 static void
 610 brw_update_renderbuffer_surface(struct brw_context *brw,
 611                                 struct gl_renderbuffer *rb,
 612                                 bool layered,
 613                                 unsigned int unit)
 614 {
 615    struct gl_context *ctx = &brw->ctx;
 616    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 617    struct intel_mipmap_tree *mt = irb->mt;
 618    uint32_t *surf;
 619    uint32_t tile_x, tile_y;
 620    uint32_t format = 0;
 621    /* _NEW_BUFFERS */
 622    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 623    /* BRW_NEW_FS_PROG_DATA */
 624    uint32_t surf_index =
 625       brw->wm.prog_data->binding_table.render_target_start + unit;
 626
 627    assert(!layered);
 628
 629    if (rb->TexImage && !brw->has_surface_tile_offset) {
 630       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 631
 632       if (tile_x != 0 || tile_y != 0) {
 633          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 634           * destination in a miptree unless you actually setup your renderbuffer
 635           * as a miptree and used the fragile lod/array_index/etc. controls to
 636           * select the image.  So, instead, we just make a new single-level
 637           * miptree and render into that.
 638           */
 639          intel_renderbuffer_move_to_temp(brw, irb, false);
 640          mt = irb->mt;
 641       }
 642    }
 643
 644    intel_miptree_used_for_rendering(irb->mt);
 645
 646    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 647                           &brw->wm.base.surf_offset[surf_index]);
 648
 649    format = brw->render_target_format[rb_format];
 650    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 651       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 652                     __FUNCTION__, _mesa_get_format_name(rb_format));
 653    }
 654
 655    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 656               format << BRW_SURFACE_FORMAT_SHIFT);
 657
 658    /* reloc */
 659    assert(mt->offset % mt->cpp == 0);
 660    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 661               mt->bo->offset64 + mt->offset);
 662
 663    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 664               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 665
 666    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 667               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 668
 669    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 670
 671    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 672    /* Note that the low bits of these fields are missing, so
 673     * there's the possibility of getting in trouble.
 674     */
 675    assert(tile_x % 4 == 0);
 676    assert(tile_y % 2 == 0);
 677    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 678               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 679               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 680
 681    if (brw->gen < 6) {
 682       /* _NEW_COLOR */
 683       if (!ctx->Color.ColorLogicOpEnabled &&
 684           (ctx->Color.BlendEnabled & (1 << unit)))
 685          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 686
 687       if (!ctx->Color.ColorMask[unit][0])
 688          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 689       if (!ctx->Color.ColorMask[unit][1])
 690          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 691       if (!ctx->Color.ColorMask[unit][2])
 692          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 693
 694       /* As mentioned above, disable writes to the alpha component when the
 695        * renderbuffer is XRGB.
 696        */
 697       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 698           !ctx->Color.ColorMask[unit][3]) {
 699          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 700       }
 701    }
 702
 703    drm_intel_bo_emit_reloc(brw->batch.bo,
 704                            brw->wm.base.surf_offset[surf_index] + 4,
 705                            mt->bo,
 706                            surf[1] - mt->bo->offset64,
 707                            I915_GEM_DOMAIN_RENDER,
 708                            I915_GEM_DOMAIN_RENDER);
 709 }
 710
 711 /**
 712  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 713  */
 714 static void
 715 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 716 {
 717    struct gl_context *ctx = &brw->ctx;
 718    GLuint i;
 719
 720    /* _NEW_BUFFERS | _NEW_COLOR */
 721    /* Update surfaces for drawing buffers */
 722    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 723       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 724          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 725             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 726                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 727          } else {
 728             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 729          }
 730       }
 731    } else {
 732       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 733    }
 734    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 735 }
 736
 737 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 738    .dirty = {
 739       .mesa = _NEW_BUFFERS |
 740               _NEW_COLOR,
 741       .brw = BRW_NEW_BATCH |
 742              BRW_NEW_FS_PROG_DATA,
 743    },
 744    .emit = brw_update_renderbuffer_surfaces,
 745 };
 746
 747 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 748    .dirty = {
 749       .mesa = _NEW_BUFFERS,
 750       .brw = BRW_NEW_BATCH,
 751    },
 752    .emit = brw_update_renderbuffer_surfaces,
 753 };
 754
 755
 756 static void
 757 update_stage_texture_surfaces(struct brw_context *brw,
 758                               const struct gl_program *prog,
 759                               struct brw_stage_state *stage_state,
 760                               bool for_gather)
 761 {
 762    if (!prog)
 763       return;
 764
 765    struct gl_context *ctx = &brw->ctx;
 766
 767    uint32_t *surf_offset = stage_state->surf_offset;
 768
 769    /* BRW_NEW_*_PROG_DATA */
 770    if (for_gather)
 771       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 772    else
 773       surf_offset += stage_state->prog_data->binding_table.texture_start;
 774
 775    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 776    for (unsigned s = 0; s < num_samplers; s++) {
 777       surf_offset[s] = 0;
 778
 779       if (prog->SamplersUsed & (1 << s)) {
 780          const unsigned unit = prog->SamplerUnits[s];
 781
 782          /* _NEW_TEXTURE */
 783          if (ctx->Texture.Unit[unit]._Current) {
 784             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 785          }
 786       }
 787    }
 788 }
 789
 790
 791 /**
 792  * Construct SURFACE_STATE objects for enabled textures.
 793  */
 794 static void
 795 brw_update_texture_surfaces(struct brw_context *brw)
 796 {
 797    /* BRW_NEW_VERTEX_PROGRAM */
 798    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 799
 800    /* BRW_NEW_GEOMETRY_PROGRAM */
 801    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 802
 803    /* BRW_NEW_FRAGMENT_PROGRAM */
 804    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 805
 806    /* _NEW_TEXTURE */
 807    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 808    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 809    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 810
 811    /* emit alternate set of surface state for gather. this
 812     * allows the surface format to be overriden for only the
 813     * gather4 messages. */
 814    if (brw->gen < 8) {
 815       if (vs && vs->UsesGather)
 816          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 817       if (gs && gs->UsesGather)
 818          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 819       if (fs && fs->UsesGather)
 820          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 821    }
 822
 823    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 824 }
 825
 826 const struct brw_tracked_state brw_texture_surfaces = {
 827    .dirty = {
 828       .mesa = _NEW_TEXTURE,
 829       .brw = BRW_NEW_BATCH |
 830              BRW_NEW_FRAGMENT_PROGRAM |
 831              BRW_NEW_FS_PROG_DATA |
 832              BRW_NEW_GEOMETRY_PROGRAM |
 833              BRW_NEW_GS_PROG_DATA |
 834              BRW_NEW_TEXTURE_BUFFER |
 835              BRW_NEW_VERTEX_PROGRAM |
 836              BRW_NEW_VS_PROG_DATA,
 837    },
 838    .emit = brw_update_texture_surfaces,
 839 };
 840
 841 void
 842 brw_upload_ubo_surfaces(struct brw_context *brw,
 843                         struct gl_shader *shader,
 844                         struct brw_stage_state *stage_state,
 845                         struct brw_stage_prog_data *prog_data,
 846                         bool dword_pitch)
 847 {
 848    struct gl_context *ctx = &brw->ctx;
 849
 850    if (!shader)
 851       return;
 852
 853    uint32_t *surf_offsets =
 854       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 855
 856    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 857       struct gl_uniform_buffer_binding *binding;
 858       struct intel_buffer_object *intel_bo;
 859
 860       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 861       intel_bo = intel_buffer_object(binding->BufferObject);
 862       drm_intel_bo *bo =
 863          intel_bufferobj_buffer(brw, intel_bo,
 864                                 binding->Offset,
 865                                 binding->BufferObject->Size - binding->Offset);
 866
 867       /* Because behavior for referencing outside of the binding's size in the
 868        * glBindBufferRange case is undefined, we can just bind the whole buffer
 869        * glBindBufferBase wants and be a correct implementation.
 870        */
 871       brw_create_constant_surface(brw, bo, binding->Offset,
 872                                   bo->size - binding->Offset,
 873                                   &surf_offsets[i],
 874                                   dword_pitch);
 875    }
 876
 877    if (shader->NumUniformBlocks)
 878       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 879 }
 880
 881 static void
 882 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 883 {
 884    struct gl_context *ctx = &brw->ctx;
 885    /* _NEW_PROGRAM */
 886    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 887
 888    if (!prog)
 889       return;
 890
 891    /* BRW_NEW_FS_PROG_DATA */
 892    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 893                            &brw->wm.base, &brw->wm.prog_data->base, true);
 894 }
 895
 896 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 897    .dirty = {
 898       .mesa = _NEW_PROGRAM,
 899       .brw = BRW_NEW_BATCH |
 900              BRW_NEW_FS_PROG_DATA |
 901              BRW_NEW_UNIFORM_BUFFER,
 902    },
 903    .emit = brw_upload_wm_ubo_surfaces,
 904 };
 905
 906 void
 907 brw_upload_abo_surfaces(struct brw_context *brw,
 908                         struct gl_shader_program *prog,
 909                         struct brw_stage_state *stage_state,
 910                         struct brw_stage_prog_data *prog_data)
 911 {
 912    struct gl_context *ctx = &brw->ctx;
 913    uint32_t *surf_offsets =
 914       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 915
 916    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 917       struct gl_atomic_buffer_binding *binding =
 918          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 919       struct intel_buffer_object *intel_bo =
 920          intel_buffer_object(binding->BufferObject);
 921       drm_intel_bo *bo = intel_bufferobj_buffer(
 922          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 923
 924       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 925                                    bo->size - binding->Offset,
 926                                    &surf_offsets[i], true);
 927    }
 928
 929    if (prog->NumAtomicBuffers)
 930       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 931 }
 932
 933 static void
 934 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 935 {
 936    struct gl_context *ctx = &brw->ctx;
 937    /* _NEW_PROGRAM */
 938    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 939
 940    if (prog) {
 941       /* BRW_NEW_FS_PROG_DATA */
 942       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 943                               &brw->wm.prog_data->base);
 944    }
 945 }
 946
 947 const struct brw_tracked_state brw_wm_abo_surfaces = {
 948    .dirty = {
 949       .mesa = _NEW_PROGRAM,
 950       .brw = BRW_NEW_ATOMIC_BUFFER |
 951              BRW_NEW_BATCH |
 952              BRW_NEW_FS_PROG_DATA,
 953    },
 954    .emit = brw_upload_wm_abo_surfaces,
 955 };
 956
 957 void
 958 gen4_init_vtable_surface_functions(struct brw_context *brw)
 959 {
 960    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 961    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 962    brw->vtbl.update_null_renderbuffer_surface =
 963       brw_update_null_renderbuffer_surface;
 964    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 965 }