src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    /* If the texture's format is alpha-only, force R, G, and B to
 191     * 0.0. Similarly, if the texture's format has no alpha channel,
 192     * force the alpha value read to 1.0. This allows for the
 193     * implementation to use an RGBA texture for any of these formats
 194     * without leaking any unexpected values.
 195     */
 196    switch (img->_BaseFormat) {
 197    case GL_ALPHA:
 198       swizzles[0] = SWIZZLE_ZERO;
 199       swizzles[1] = SWIZZLE_ZERO;
 200       swizzles[2] = SWIZZLE_ZERO;
 201       break;
 202    case GL_RED:
 203    case GL_RG:
 204    case GL_RGB:
 205       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 206          swizzles[3] = SWIZZLE_ONE;
 207       break;
 208    }
 209
 210    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 211                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 212                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 213                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 214 }
 215
 216 static void
 217 gen4_emit_buffer_surface_state(struct brw_context *brw,
 218                                uint32_t *out_offset,
 219                                drm_intel_bo *bo,
 220                                unsigned buffer_offset,
 221                                unsigned surface_format,
 222                                unsigned buffer_size,
 223                                unsigned pitch,
 224                                unsigned mocs,
 225                                bool rw)
 226 {
 227    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 228                                     6 * 4, 32, out_offset);
 229    memset(surf, 0, 6 * 4);
 230
 231    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 232              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 233              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 234    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 235    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 236              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 237    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 238              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 239
 240    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 241     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 242     * physical cache.  It is mapped in hardware to the sampler cache."
 243     */
 244    if (bo) {
 245       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 246                               bo, buffer_offset,
 247                               I915_GEM_DOMAIN_SAMPLER,
 248                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 249    }
 250 }
 251
 252 void
 253 brw_update_buffer_texture_surface(struct gl_context *ctx,
 254                                   unsigned unit,
 255                                   uint32_t *surf_offset)
 256 {
 257    struct brw_context *brw = brw_context(ctx);
 258    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 259    struct intel_buffer_object *intel_obj =
 260       intel_buffer_object(tObj->BufferObject);
 261    uint32_t size = tObj->BufferSize;
 262    drm_intel_bo *bo = NULL;
 263    mesa_format format = tObj->_BufferObjectFormat;
 264    uint32_t brw_format = brw_format_for_mesa_format(format);
 265    int texel_size = _mesa_get_format_bytes(format);
 266
 267    if (intel_obj) {
 268       size = MIN2(size, intel_obj->Base.Size);
 269       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 270    }
 271
 272    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 273       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 274                     _mesa_get_format_name(format));
 275    }
 276
 277    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 278                                        tObj->BufferOffset,
 279                                        brw_format,
 280                                        size / texel_size,
 281                                        texel_size,
 282                                        0, /* mocs */
 283                                        false /* rw */);
 284 }
 285
 286 static void
 287 brw_update_texture_surface(struct gl_context *ctx,
 288                            unsigned unit,
 289                            uint32_t *surf_offset,
 290                            bool for_gather)
 291 {
 292    struct brw_context *brw = brw_context(ctx);
 293    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 294    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 295    struct intel_mipmap_tree *mt = intelObj->mt;
 296    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 297    uint32_t *surf;
 298
 299    /* BRW_NEW_TEXTURE_BUFFER */
 300    if (tObj->Target == GL_TEXTURE_BUFFER) {
 301       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 302       return;
 303    }
 304
 305    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 306                           6 * 4, 32, surf_offset);
 307
 308    uint32_t tex_format = translate_tex_format(brw, mt->format,
 309                                               sampler->sRGBDecode);
 310
 311    if (for_gather) {
 312       /* Sandybridge's gather4 message is broken for integer formats.
 313        * To work around this, we pretend the surface is UNORM for
 314        * 8 or 16-bit formats, and emit shader instructions to recover
 315        * the real INT/UINT value.  For 32-bit formats, we pretend
 316        * the surface is FLOAT, and simply reinterpret the resulting
 317        * bits.
 318        */
 319       switch (tex_format) {
 320       case BRW_SURFACEFORMAT_R8_SINT:
 321       case BRW_SURFACEFORMAT_R8_UINT:
 322          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 323          break;
 324
 325       case BRW_SURFACEFORMAT_R16_SINT:
 326       case BRW_SURFACEFORMAT_R16_UINT:
 327          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 328          break;
 329
 330       case BRW_SURFACEFORMAT_R32_SINT:
 331       case BRW_SURFACEFORMAT_R32_UINT:
 332          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 333          break;
 334
 335       default:
 336          break;
 337       }
 338    }
 339
 340    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 341               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 342               BRW_SURFACE_CUBEFACE_ENABLES |
 343               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 344
 345    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 346
 347    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 348               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 349               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 350
 351    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 352               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 353               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 354
 355    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 356               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 357
 358    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 359
 360    /* Emit relocation to surface contents */
 361    drm_intel_bo_emit_reloc(brw->batch.bo,
 362                            *surf_offset + 4,
 363                            mt->bo,
 364                            surf[1] - mt->bo->offset64,
 365                            I915_GEM_DOMAIN_SAMPLER, 0);
 366 }
 367
 368 /**
 369  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 370  * read from this buffer with Data Port Read instructions/messages.
 371  */
 372 void
 373 brw_create_constant_surface(struct brw_context *brw,
 374                             drm_intel_bo *bo,
 375                             uint32_t offset,
 376                             uint32_t size,
 377                             uint32_t *out_offset,
 378                             bool dword_pitch)
 379 {
 380    uint32_t stride = dword_pitch ? 4 : 16;
 381    uint32_t elements = ALIGN(size, stride) / stride;
 382
 383    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 384                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 385                                        elements, stride, 0, false);
 386 }
 387
 388 /**
 389  * Set up a binding table entry for use by stream output logic (transform
 390  * feedback).
 391  *
 392  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 393  */
 394 void
 395 brw_update_sol_surface(struct brw_context *brw,
 396                        struct gl_buffer_object *buffer_obj,
 397                        uint32_t *out_offset, unsigned num_vector_components,
 398                        unsigned stride_dwords, unsigned offset_dwords)
 399 {
 400    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 401    uint32_t offset_bytes = 4 * offset_dwords;
 402    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 403                                              offset_bytes,
 404                                              buffer_obj->Size - offset_bytes);
 405    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 406                                     out_offset);
 407    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 408    size_t size_dwords = buffer_obj->Size / 4;
 409    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 410
 411    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 412     * too big to map using a single binding table entry?
 413     */
 414    assert((size_dwords - offset_dwords) / stride_dwords
 415           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 416
 417    if (size_dwords > offset_dwords + num_vector_components) {
 418       /* There is room for at least 1 transform feedback output in the buffer.
 419        * Compute the number of additional transform feedback outputs the
 420        * buffer has room for.
 421        */
 422       buffer_size_minus_1 =
 423          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 424    } else {
 425       /* There isn't even room for a single transform feedback output in the
 426        * buffer.  We can't configure the binding table entry to prevent output
 427        * entirely; we'll have to rely on the geometry shader to detect
 428        * overflow.  But to minimize the damage in case of a bug, set up the
 429        * binding table entry to just allow a single output.
 430        */
 431       buffer_size_minus_1 = 0;
 432    }
 433    width = buffer_size_minus_1 & 0x7f;
 434    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 435    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 436
 437    switch (num_vector_components) {
 438    case 1:
 439       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 440       break;
 441    case 2:
 442       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 443       break;
 444    case 3:
 445       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 446       break;
 447    case 4:
 448       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 449       break;
 450    default:
 451       unreachable("Invalid vector size for transform feedback output");
 452    }
 453
 454    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 455       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 456       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 457       BRW_SURFACE_RC_READ_WRITE;
 458    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 459    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 460               height << BRW_SURFACE_HEIGHT_SHIFT);
 461    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 462               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 463    surf[4] = 0;
 464    surf[5] = 0;
 465
 466    /* Emit relocation to surface contents. */
 467    drm_intel_bo_emit_reloc(brw->batch.bo,
 468                            *out_offset + 4,
 469                            bo, offset_bytes,
 470                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 471 }
 472
 473 /* Creates a new WM constant buffer reflecting the current fragment program's
 474  * constants, if needed by the fragment program.
 475  *
 476  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 477  * state atom.
 478  */
 479 static void
 480 brw_upload_wm_pull_constants(struct brw_context *brw)
 481 {
 482    struct brw_stage_state *stage_state = &brw->wm.base;
 483    /* BRW_NEW_FRAGMENT_PROGRAM */
 484    struct brw_fragment_program *fp =
 485       (struct brw_fragment_program *) brw->fragment_program;
 486    /* BRW_NEW_FS_PROG_DATA */
 487    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 488
 489    /* _NEW_PROGRAM_CONSTANTS */
 490    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 491                              stage_state, prog_data, true);
 492 }
 493
 494 const struct brw_tracked_state brw_wm_pull_constants = {
 495    .dirty = {
 496       .mesa = _NEW_PROGRAM_CONSTANTS,
 497       .brw = BRW_NEW_BATCH |
 498              BRW_NEW_FRAGMENT_PROGRAM |
 499              BRW_NEW_FS_PROG_DATA,
 500    },
 501    .emit = brw_upload_wm_pull_constants,
 502 };
 503
 504 /**
 505  * Creates a null renderbuffer surface.
 506  *
 507  * This is used when the shader doesn't write to any color output.  An FB
 508  * write to target 0 will still be emitted, because that's how the thread is
 509  * terminated (and computed depth is returned), so we need to have the
 510  * hardware discard the target 0 color output..
 511  */
 512 static void
 513 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 514 {
 515    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 516     * Notes):
 517     *
 518     *     A null surface will be used in instances where an actual surface is
 519     *     not bound. When a write message is generated to a null surface, no
 520     *     actual surface is written to. When a read message (including any
 521     *     sampling engine message) is generated to a null surface, the result
 522     *     is all zeros. Note that a null surface type is allowed to be used
 523     *     with all messages, even if it is not specificially indicated as
 524     *     supported. All of the remaining fields in surface state are ignored
 525     *     for null surfaces, with the following exceptions:
 526     *
 527     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 528     *       depth buffer’s corresponding state for all render target surfaces,
 529     *       including null.
 530     *
 531     *     - Surface Format must be R8G8B8A8_UNORM.
 532     */
 533    struct gl_context *ctx = &brw->ctx;
 534    uint32_t *surf;
 535    unsigned surface_type = BRW_SURFACE_NULL;
 536    drm_intel_bo *bo = NULL;
 537    unsigned pitch_minus_1 = 0;
 538    uint32_t multisampling_state = 0;
 539    uint32_t surf_index =
 540       brw->wm.prog_data->binding_table.render_target_start + unit;
 541
 542    /* _NEW_BUFFERS */
 543    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 544
 545    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 546                           &brw->wm.base.surf_offset[surf_index]);
 547
 548    if (fb->Visual.samples > 1) {
 549       /* On Gen6, null render targets seem to cause GPU hangs when
 550        * multisampling.  So work around this problem by rendering into dummy
 551        * color buffer.
 552        *
 553        * To decrease the amount of memory needed by the workaround buffer, we
 554        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 555        * the amount of memory needed for the workaround buffer is
 556        * (width_in_tiles + height_in_tiles - 1) tiles.
 557        *
 558        * Note that since the workaround buffer will be interpreted by the
 559        * hardware as an interleaved multisampled buffer, we need to compute
 560        * width_in_tiles and height_in_tiles by dividing the width and height
 561        * by 16 rather than the normal Y-tile size of 32.
 562        */
 563       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 564       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 565       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 566       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 567                          size_needed);
 568       bo = brw->wm.multisampled_null_render_target_bo;
 569       surface_type = BRW_SURFACE_2D;
 570       pitch_minus_1 = 127;
 571       multisampling_state =
 572          brw_get_surface_num_multisamples(fb->Visual.samples);
 573    }
 574
 575    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 576               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 577    if (brw->gen < 6) {
 578       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 579                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 580                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 581                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 582    }
 583    surf[1] = bo ? bo->offset64 : 0;
 584    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 585               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 586
 587    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 588     * Notes):
 589     *
 590     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 591     */
 592    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 593               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 594    surf[4] = multisampling_state;
 595    surf[5] = 0;
 596
 597    if (bo) {
 598       drm_intel_bo_emit_reloc(brw->batch.bo,
 599                               brw->wm.base.surf_offset[surf_index] + 4,
 600                               bo, 0,
 601                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 602    }
 603 }
 604
 605 /**
 606  * Sets up a surface state structure to point at the given region.
 607  * While it is only used for the front/back buffer currently, it should be
 608  * usable for further buffers when doing ARB_draw_buffer support.
 609  */
 610 static void
 611 brw_update_renderbuffer_surface(struct brw_context *brw,
 612                                 struct gl_renderbuffer *rb,
 613                                 bool layered,
 614                                 unsigned int unit)
 615 {
 616    struct gl_context *ctx = &brw->ctx;
 617    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 618    struct intel_mipmap_tree *mt = irb->mt;
 619    uint32_t *surf;
 620    uint32_t tile_x, tile_y;
 621    uint32_t format = 0;
 622    /* _NEW_BUFFERS */
 623    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 624    uint32_t surf_index =
 625       brw->wm.prog_data->binding_table.render_target_start + unit;
 626
 627    assert(!layered);
 628
 629    if (rb->TexImage && !brw->has_surface_tile_offset) {
 630       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 631
 632       if (tile_x != 0 || tile_y != 0) {
 633          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 634           * destination in a miptree unless you actually setup your renderbuffer
 635           * as a miptree and used the fragile lod/array_index/etc. controls to
 636           * select the image.  So, instead, we just make a new single-level
 637           * miptree and render into that.
 638           */
 639          intel_renderbuffer_move_to_temp(brw, irb, false);
 640          mt = irb->mt;
 641       }
 642    }
 643
 644    intel_miptree_used_for_rendering(irb->mt);
 645
 646    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 647                           &brw->wm.base.surf_offset[surf_index]);
 648
 649    format = brw->render_target_format[rb_format];
 650    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 651       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 652                     __FUNCTION__, _mesa_get_format_name(rb_format));
 653    }
 654
 655    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 656               format << BRW_SURFACE_FORMAT_SHIFT);
 657
 658    /* reloc */
 659    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 660               mt->bo->offset64);
 661
 662    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 663               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 664
 665    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 666               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 667
 668    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 669
 670    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 671    /* Note that the low bits of these fields are missing, so
 672     * there's the possibility of getting in trouble.
 673     */
 674    assert(tile_x % 4 == 0);
 675    assert(tile_y % 2 == 0);
 676    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 677               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 678               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 679
 680    if (brw->gen < 6) {
 681       /* _NEW_COLOR */
 682       if (!ctx->Color.ColorLogicOpEnabled &&
 683           (ctx->Color.BlendEnabled & (1 << unit)))
 684          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 685
 686       if (!ctx->Color.ColorMask[unit][0])
 687          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 688       if (!ctx->Color.ColorMask[unit][1])
 689          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 690       if (!ctx->Color.ColorMask[unit][2])
 691          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 692
 693       /* As mentioned above, disable writes to the alpha component when the
 694        * renderbuffer is XRGB.
 695        */
 696       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 697           !ctx->Color.ColorMask[unit][3]) {
 698          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 699       }
 700    }
 701
 702    drm_intel_bo_emit_reloc(brw->batch.bo,
 703                            brw->wm.base.surf_offset[surf_index] + 4,
 704                            mt->bo,
 705                            surf[1] - mt->bo->offset64,
 706                            I915_GEM_DOMAIN_RENDER,
 707                            I915_GEM_DOMAIN_RENDER);
 708 }
 709
 710 /**
 711  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 712  */
 713 static void
 714 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 715 {
 716    struct gl_context *ctx = &brw->ctx;
 717    GLuint i;
 718
 719    /* _NEW_BUFFERS | _NEW_COLOR */
 720    /* Update surfaces for drawing buffers */
 721    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 722       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 723          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 724             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 725                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 726          } else {
 727             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 728          }
 729       }
 730    } else {
 731       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 732    }
 733    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 734 }
 735
 736 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 737    .dirty = {
 738       .mesa = _NEW_BUFFERS |
 739               _NEW_COLOR,
 740       .brw = BRW_NEW_BATCH,
 741    },
 742    .emit = brw_update_renderbuffer_surfaces,
 743 };
 744
 745 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 746    .dirty = {
 747       .mesa = _NEW_BUFFERS,
 748       .brw = BRW_NEW_BATCH,
 749    },
 750    .emit = brw_update_renderbuffer_surfaces,
 751 };
 752
 753
 754 static void
 755 update_stage_texture_surfaces(struct brw_context *brw,
 756                               const struct gl_program *prog,
 757                               struct brw_stage_state *stage_state,
 758                               bool for_gather)
 759 {
 760    if (!prog)
 761       return;
 762
 763    struct gl_context *ctx = &brw->ctx;
 764
 765    uint32_t *surf_offset = stage_state->surf_offset;
 766    if (for_gather)
 767       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 768    else
 769       surf_offset += stage_state->prog_data->binding_table.texture_start;
 770
 771    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 772    for (unsigned s = 0; s < num_samplers; s++) {
 773       surf_offset[s] = 0;
 774
 775       if (prog->SamplersUsed & (1 << s)) {
 776          const unsigned unit = prog->SamplerUnits[s];
 777
 778          /* _NEW_TEXTURE */
 779          if (ctx->Texture.Unit[unit]._Current) {
 780             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 781          }
 782       }
 783    }
 784 }
 785
 786
 787 /**
 788  * Construct SURFACE_STATE objects for enabled textures.
 789  */
 790 static void
 791 brw_update_texture_surfaces(struct brw_context *brw)
 792 {
 793    /* BRW_NEW_VERTEX_PROGRAM */
 794    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 795
 796    /* BRW_NEW_GEOMETRY_PROGRAM */
 797    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 798
 799    /* BRW_NEW_FRAGMENT_PROGRAM */
 800    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 801
 802    /* _NEW_TEXTURE */
 803    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 804    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 805    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 806
 807    /* emit alternate set of surface state for gather. this
 808     * allows the surface format to be overriden for only the
 809     * gather4 messages. */
 810    if (brw->gen < 8) {
 811       if (vs && vs->UsesGather)
 812          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 813       if (gs && gs->UsesGather)
 814          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 815       if (fs && fs->UsesGather)
 816          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 817    }
 818
 819    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 820 }
 821
 822 const struct brw_tracked_state brw_texture_surfaces = {
 823    .dirty = {
 824       .mesa = _NEW_TEXTURE,
 825       .brw = BRW_NEW_BATCH |
 826              BRW_NEW_FRAGMENT_PROGRAM |
 827              BRW_NEW_GEOMETRY_PROGRAM |
 828              BRW_NEW_TEXTURE_BUFFER |
 829              BRW_NEW_VERTEX_PROGRAM,
 830    },
 831    .emit = brw_update_texture_surfaces,
 832 };
 833
 834 void
 835 brw_upload_ubo_surfaces(struct brw_context *brw,
 836                         struct gl_shader *shader,
 837                         struct brw_stage_state *stage_state,
 838                         struct brw_stage_prog_data *prog_data,
 839                         bool dword_pitch)
 840 {
 841    struct gl_context *ctx = &brw->ctx;
 842
 843    if (!shader)
 844       return;
 845
 846    uint32_t *surf_offsets =
 847       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 848
 849    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 850       struct gl_uniform_buffer_binding *binding;
 851       struct intel_buffer_object *intel_bo;
 852
 853       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 854       intel_bo = intel_buffer_object(binding->BufferObject);
 855       drm_intel_bo *bo =
 856          intel_bufferobj_buffer(brw, intel_bo,
 857                                 binding->Offset,
 858                                 binding->BufferObject->Size - binding->Offset);
 859
 860       /* Because behavior for referencing outside of the binding's size in the
 861        * glBindBufferRange case is undefined, we can just bind the whole buffer
 862        * glBindBufferBase wants and be a correct implementation.
 863        */
 864       brw_create_constant_surface(brw, bo, binding->Offset,
 865                                   bo->size - binding->Offset,
 866                                   &surf_offsets[i],
 867                                   dword_pitch);
 868    }
 869
 870    if (shader->NumUniformBlocks)
 871       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 872 }
 873
 874 static void
 875 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 876 {
 877    struct gl_context *ctx = &brw->ctx;
 878    /* _NEW_PROGRAM */
 879    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 880
 881    if (!prog)
 882       return;
 883
 884    /* BRW_NEW_FS_PROG_DATA */
 885    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 886                            &brw->wm.base, &brw->wm.prog_data->base, true);
 887 }
 888
 889 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 890    .dirty = {
 891       .mesa = _NEW_PROGRAM,
 892       .brw = BRW_NEW_BATCH |
 893              BRW_NEW_FS_PROG_DATA |
 894              BRW_NEW_UNIFORM_BUFFER,
 895    },
 896    .emit = brw_upload_wm_ubo_surfaces,
 897 };
 898
 899 void
 900 brw_upload_abo_surfaces(struct brw_context *brw,
 901                         struct gl_shader_program *prog,
 902                         struct brw_stage_state *stage_state,
 903                         struct brw_stage_prog_data *prog_data)
 904 {
 905    struct gl_context *ctx = &brw->ctx;
 906    uint32_t *surf_offsets =
 907       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 908
 909    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 910       struct gl_atomic_buffer_binding *binding =
 911          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 912       struct intel_buffer_object *intel_bo =
 913          intel_buffer_object(binding->BufferObject);
 914       drm_intel_bo *bo = intel_bufferobj_buffer(
 915          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 916
 917       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 918                                    bo->size - binding->Offset,
 919                                    &surf_offsets[i], true);
 920    }
 921
 922    if (prog->NumAtomicBuffers)
 923       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 924 }
 925
 926 static void
 927 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 928 {
 929    struct gl_context *ctx = &brw->ctx;
 930    /* _NEW_PROGRAM */
 931    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 932
 933    if (prog) {
 934       /* BRW_NEW_FS_PROG_DATA */
 935       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 936                               &brw->wm.prog_data->base);
 937    }
 938 }
 939
 940 const struct brw_tracked_state brw_wm_abo_surfaces = {
 941    .dirty = {
 942       .mesa = _NEW_PROGRAM,
 943       .brw = BRW_NEW_ATOMIC_BUFFER |
 944              BRW_NEW_BATCH |
 945              BRW_NEW_FS_PROG_DATA,
 946    },
 947    .emit = brw_upload_wm_abo_surfaces,
 948 };
 949
 950 void
 951 gen4_init_vtable_surface_functions(struct brw_context *brw)
 952 {
 953    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 954    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 955    brw->vtbl.update_null_renderbuffer_surface =
 956       brw_update_null_renderbuffer_surface;
 957    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 958 }