src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch,
 201                                unsigned mocs,
 202                                bool rw)
 203 {
 204    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 205                                     6 * 4, 32, out_offset);
 206    memset(surf, 0, 6 * 4);
 207
 208    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 209              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 210              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 211    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 212    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 213              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 214    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 215              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 216
 217    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 218     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 219     * physical cache.  It is mapped in hardware to the sampler cache."
 220     */
 221    if (bo) {
 222       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 223                               bo, buffer_offset,
 224                               I915_GEM_DOMAIN_SAMPLER,
 225                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 226    }
 227 }
 228
 229 void
 230 brw_update_buffer_texture_surface(struct gl_context *ctx,
 231                                   unsigned unit,
 232                                   uint32_t *surf_offset)
 233 {
 234    struct brw_context *brw = brw_context(ctx);
 235    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 236    struct intel_buffer_object *intel_obj =
 237       intel_buffer_object(tObj->BufferObject);
 238    uint32_t size = tObj->BufferSize;
 239    drm_intel_bo *bo = NULL;
 240    mesa_format format = tObj->_BufferObjectFormat;
 241    uint32_t brw_format = brw_format_for_mesa_format(format);
 242    int texel_size = _mesa_get_format_bytes(format);
 243
 244    if (intel_obj) {
 245       size = MIN2(size, intel_obj->Base.Size);
 246       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 247    }
 248
 249    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 250       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 251                     _mesa_get_format_name(format));
 252    }
 253
 254    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 255                                        tObj->BufferOffset,
 256                                        brw_format,
 257                                        size / texel_size,
 258                                        texel_size,
 259                                        0, /* mocs */
 260                                        false /* rw */);
 261 }
 262
 263 static void
 264 brw_update_texture_surface(struct gl_context *ctx,
 265                            unsigned unit,
 266                            uint32_t *surf_offset,
 267                            bool for_gather)
 268 {
 269    struct brw_context *brw = brw_context(ctx);
 270    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 271    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 272    struct intel_mipmap_tree *mt = intelObj->mt;
 273    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 274    uint32_t *surf;
 275
 276    /* BRW_NEW_UNIFORM_BUFFER */
 277    if (tObj->Target == GL_TEXTURE_BUFFER) {
 278       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 279       return;
 280    }
 281
 282    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 283                           6 * 4, 32, surf_offset);
 284
 285    uint32_t tex_format = translate_tex_format(brw, mt->format,
 286                                               sampler->sRGBDecode);
 287
 288    if (for_gather) {
 289       /* Sandybridge's gather4 message is broken for integer formats.
 290        * To work around this, we pretend the surface is UNORM for
 291        * 8 or 16-bit formats, and emit shader instructions to recover
 292        * the real INT/UINT value.  For 32-bit formats, we pretend
 293        * the surface is FLOAT, and simply reinterpret the resulting
 294        * bits.
 295        */
 296       switch (tex_format) {
 297       case BRW_SURFACEFORMAT_R8_SINT:
 298       case BRW_SURFACEFORMAT_R8_UINT:
 299          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 300          break;
 301
 302       case BRW_SURFACEFORMAT_R16_SINT:
 303       case BRW_SURFACEFORMAT_R16_UINT:
 304          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 305          break;
 306
 307       case BRW_SURFACEFORMAT_R32_SINT:
 308       case BRW_SURFACEFORMAT_R32_UINT:
 309          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 310          break;
 311
 312       default:
 313          break;
 314       }
 315    }
 316
 317    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 318               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 319               BRW_SURFACE_CUBEFACE_ENABLES |
 320               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 321
 322    surf[1] = intelObj->mt->region->bo->offset64 + intelObj->mt->offset; /* reloc */
 323
 324    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 325               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 326               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 327
 328    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 329               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 330               (intelObj->mt->region->pitch - 1) <<
 331               BRW_SURFACE_PITCH_SHIFT);
 332
 333    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 334               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 335
 336    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 337
 338    /* Emit relocation to surface contents */
 339    drm_intel_bo_emit_reloc(brw->batch.bo,
 340                            *surf_offset + 4,
 341                            intelObj->mt->region->bo,
 342                            surf[1] - intelObj->mt->region->bo->offset64,
 343                            I915_GEM_DOMAIN_SAMPLER, 0);
 344 }
 345
 346 /**
 347  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 348  * read from this buffer with Data Port Read instructions/messages.
 349  */
 350 void
 351 brw_create_constant_surface(struct brw_context *brw,
 352                             drm_intel_bo *bo,
 353                             uint32_t offset,
 354                             uint32_t size,
 355                             uint32_t *out_offset,
 356                             bool dword_pitch)
 357 {
 358    uint32_t stride = dword_pitch ? 4 : 16;
 359    uint32_t elements = ALIGN(size, stride) / stride;
 360
 361    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 362                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 363                                        elements, stride, 0, false);
 364 }
 365
 366 /**
 367  * Set up a binding table entry for use by stream output logic (transform
 368  * feedback).
 369  *
 370  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 371  */
 372 void
 373 brw_update_sol_surface(struct brw_context *brw,
 374                        struct gl_buffer_object *buffer_obj,
 375                        uint32_t *out_offset, unsigned num_vector_components,
 376                        unsigned stride_dwords, unsigned offset_dwords)
 377 {
 378    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 379    uint32_t offset_bytes = 4 * offset_dwords;
 380    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 381                                              offset_bytes,
 382                                              buffer_obj->Size - offset_bytes);
 383    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 384                                     out_offset);
 385    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 386    size_t size_dwords = buffer_obj->Size / 4;
 387    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 388
 389    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 390     * too big to map using a single binding table entry?
 391     */
 392    assert((size_dwords - offset_dwords) / stride_dwords
 393           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 394
 395    if (size_dwords > offset_dwords + num_vector_components) {
 396       /* There is room for at least 1 transform feedback output in the buffer.
 397        * Compute the number of additional transform feedback outputs the
 398        * buffer has room for.
 399        */
 400       buffer_size_minus_1 =
 401          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 402    } else {
 403       /* There isn't even room for a single transform feedback output in the
 404        * buffer.  We can't configure the binding table entry to prevent output
 405        * entirely; we'll have to rely on the geometry shader to detect
 406        * overflow.  But to minimize the damage in case of a bug, set up the
 407        * binding table entry to just allow a single output.
 408        */
 409       buffer_size_minus_1 = 0;
 410    }
 411    width = buffer_size_minus_1 & 0x7f;
 412    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 413    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 414
 415    switch (num_vector_components) {
 416    case 1:
 417       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 418       break;
 419    case 2:
 420       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 421       break;
 422    case 3:
 423       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 424       break;
 425    case 4:
 426       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 427       break;
 428    default:
 429       assert(!"Invalid vector size for transform feedback output");
 430       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 431       break;
 432    }
 433
 434    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 435       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 436       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 437       BRW_SURFACE_RC_READ_WRITE;
 438    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 439    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 440               height << BRW_SURFACE_HEIGHT_SHIFT);
 441    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 442               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 443    surf[4] = 0;
 444    surf[5] = 0;
 445
 446    /* Emit relocation to surface contents. */
 447    drm_intel_bo_emit_reloc(brw->batch.bo,
 448                            *out_offset + 4,
 449                            bo, offset_bytes,
 450                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 451 }
 452
 453 /* Creates a new WM constant buffer reflecting the current fragment program's
 454  * constants, if needed by the fragment program.
 455  *
 456  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 457  * state atom.
 458  */
 459 static void
 460 brw_upload_wm_pull_constants(struct brw_context *brw)
 461 {
 462    struct gl_context *ctx = &brw->ctx;
 463    /* BRW_NEW_FRAGMENT_PROGRAM */
 464    struct brw_fragment_program *fp =
 465       (struct brw_fragment_program *) brw->fragment_program;
 466    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 467    const int size = brw->wm.prog_data->base.nr_pull_params * sizeof(float);
 468    const int surf_index =
 469       brw->wm.prog_data->base.binding_table.pull_constants_start;
 470    float *constants;
 471    unsigned int i;
 472
 473    _mesa_load_state_parameters(ctx, params);
 474
 475    /* CACHE_NEW_WM_PROG */
 476    if (brw->wm.prog_data->base.nr_pull_params == 0) {
 477       if (brw->wm.base.const_bo) {
 478          drm_intel_bo_unreference(brw->wm.base.const_bo);
 479          brw->wm.base.const_bo = NULL;
 480          brw->wm.base.surf_offset[surf_index] = 0;
 481          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 482       }
 483       return;
 484    }
 485
 486    drm_intel_bo_unreference(brw->wm.base.const_bo);
 487    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 488                                          size, 64);
 489
 490    /* _NEW_PROGRAM_CONSTANTS */
 491    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 492    constants = brw->wm.base.const_bo->virtual;
 493    for (i = 0; i < brw->wm.prog_data->base.nr_pull_params; i++) {
 494       constants[i] = *brw->wm.prog_data->base.pull_param[i];
 495    }
 496    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 497
 498    brw_create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 499                                &brw->wm.base.surf_offset[surf_index],
 500                                true);
 501
 502    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 503 }
 504
 505 const struct brw_tracked_state brw_wm_pull_constants = {
 506    .dirty = {
 507       .mesa = (_NEW_PROGRAM_CONSTANTS),
 508       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 509       .cache = CACHE_NEW_WM_PROG,
 510    },
 511    .emit = brw_upload_wm_pull_constants,
 512 };
 513
 514 static void
 515 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 516 {
 517    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 518     * Notes):
 519     *
 520     *     A null surface will be used in instances where an actual surface is
 521     *     not bound. When a write message is generated to a null surface, no
 522     *     actual surface is written to. When a read message (including any
 523     *     sampling engine message) is generated to a null surface, the result
 524     *     is all zeros. Note that a null surface type is allowed to be used
 525     *     with all messages, even if it is not specificially indicated as
 526     *     supported. All of the remaining fields in surface state are ignored
 527     *     for null surfaces, with the following exceptions:
 528     *
 529     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 530     *       depth buffer’s corresponding state for all render target surfaces,
 531     *       including null.
 532     *
 533     *     - Surface Format must be R8G8B8A8_UNORM.
 534     */
 535    struct gl_context *ctx = &brw->ctx;
 536    uint32_t *surf;
 537    unsigned surface_type = BRW_SURFACE_NULL;
 538    drm_intel_bo *bo = NULL;
 539    unsigned pitch_minus_1 = 0;
 540    uint32_t multisampling_state = 0;
 541    uint32_t surf_index =
 542       brw->wm.prog_data->binding_table.render_target_start + unit;
 543
 544    /* _NEW_BUFFERS */
 545    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 546
 547    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 548                           &brw->wm.base.surf_offset[surf_index]);
 549
 550    if (fb->Visual.samples > 1) {
 551       /* On Gen6, null render targets seem to cause GPU hangs when
 552        * multisampling.  So work around this problem by rendering into dummy
 553        * color buffer.
 554        *
 555        * To decrease the amount of memory needed by the workaround buffer, we
 556        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 557        * the amount of memory needed for the workaround buffer is
 558        * (width_in_tiles + height_in_tiles - 1) tiles.
 559        *
 560        * Note that since the workaround buffer will be interpreted by the
 561        * hardware as an interleaved multisampled buffer, we need to compute
 562        * width_in_tiles and height_in_tiles by dividing the width and height
 563        * by 16 rather than the normal Y-tile size of 32.
 564        */
 565       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 566       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 567       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 568       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 569                          size_needed);
 570       bo = brw->wm.multisampled_null_render_target_bo;
 571       surface_type = BRW_SURFACE_2D;
 572       pitch_minus_1 = 127;
 573       multisampling_state =
 574          brw_get_surface_num_multisamples(fb->Visual.samples);
 575    }
 576
 577    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 578               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 579    if (brw->gen < 6) {
 580       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 581                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 582                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 583                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 584    }
 585    surf[1] = bo ? bo->offset64 : 0;
 586    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 587               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 588
 589    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 590     * Notes):
 591     *
 592     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 593     */
 594    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 595               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 596    surf[4] = multisampling_state;
 597    surf[5] = 0;
 598
 599    if (bo) {
 600       drm_intel_bo_emit_reloc(brw->batch.bo,
 601                               brw->wm.base.surf_offset[surf_index] + 4,
 602                               bo, 0,
 603                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 604    }
 605 }
 606
 607 /**
 608  * Sets up a surface state structure to point at the given region.
 609  * While it is only used for the front/back buffer currently, it should be
 610  * usable for further buffers when doing ARB_draw_buffer support.
 611  */
 612 static void
 613 brw_update_renderbuffer_surface(struct brw_context *brw,
 614                                 struct gl_renderbuffer *rb,
 615                                 bool layered,
 616                                 unsigned int unit)
 617 {
 618    struct gl_context *ctx = &brw->ctx;
 619    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 620    struct intel_mipmap_tree *mt = irb->mt;
 621    struct intel_region *region;
 622    uint32_t *surf;
 623    uint32_t tile_x, tile_y;
 624    uint32_t format = 0;
 625    /* _NEW_BUFFERS */
 626    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 627    uint32_t surf_index =
 628       brw->wm.prog_data->binding_table.render_target_start + unit;
 629
 630    assert(!layered);
 631
 632    if (rb->TexImage && !brw->has_surface_tile_offset) {
 633       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 634
 635       if (tile_x != 0 || tile_y != 0) {
 636          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 637           * destination in a miptree unless you actually setup your renderbuffer
 638           * as a miptree and used the fragile lod/array_index/etc. controls to
 639           * select the image.  So, instead, we just make a new single-level
 640           * miptree and render into that.
 641           */
 642          intel_renderbuffer_move_to_temp(brw, irb, false);
 643          mt = irb->mt;
 644       }
 645    }
 646
 647    intel_miptree_used_for_rendering(irb->mt);
 648
 649    region = irb->mt->region;
 650
 651    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 652                           &brw->wm.base.surf_offset[surf_index]);
 653
 654    format = brw->render_target_format[rb_format];
 655    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 656       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 657                     __FUNCTION__, _mesa_get_format_name(rb_format));
 658    }
 659
 660    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 661               format << BRW_SURFACE_FORMAT_SHIFT);
 662
 663    /* reloc */
 664    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 665               region->bo->offset64);
 666
 667    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 668               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 669
 670    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 671               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 672
 673    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 674
 675    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 676    /* Note that the low bits of these fields are missing, so
 677     * there's the possibility of getting in trouble.
 678     */
 679    assert(tile_x % 4 == 0);
 680    assert(tile_y % 2 == 0);
 681    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 682               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 683               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 684
 685    if (brw->gen < 6) {
 686       /* _NEW_COLOR */
 687       if (!ctx->Color.ColorLogicOpEnabled &&
 688           (ctx->Color.BlendEnabled & (1 << unit)))
 689          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 690
 691       if (!ctx->Color.ColorMask[unit][0])
 692          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 693       if (!ctx->Color.ColorMask[unit][1])
 694          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 695       if (!ctx->Color.ColorMask[unit][2])
 696          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 697
 698       /* As mentioned above, disable writes to the alpha component when the
 699        * renderbuffer is XRGB.
 700        */
 701       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 702           !ctx->Color.ColorMask[unit][3]) {
 703          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 704       }
 705    }
 706
 707    drm_intel_bo_emit_reloc(brw->batch.bo,
 708                            brw->wm.base.surf_offset[surf_index] + 4,
 709                            region->bo,
 710                            surf[1] - region->bo->offset64,
 711                            I915_GEM_DOMAIN_RENDER,
 712                            I915_GEM_DOMAIN_RENDER);
 713 }
 714
 715 /**
 716  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 717  */
 718 static void
 719 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 720 {
 721    struct gl_context *ctx = &brw->ctx;
 722    GLuint i;
 723
 724    /* _NEW_BUFFERS | _NEW_COLOR */
 725    /* Update surfaces for drawing buffers */
 726    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 727       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 728          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 729             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 730                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 731          } else {
 732             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 733          }
 734       }
 735    } else {
 736       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 737    }
 738    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 739 }
 740
 741 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 742    .dirty = {
 743       .mesa = (_NEW_COLOR |
 744                _NEW_BUFFERS),
 745       .brw = BRW_NEW_BATCH,
 746       .cache = 0
 747    },
 748    .emit = brw_update_renderbuffer_surfaces,
 749 };
 750
 751 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 752    .dirty = {
 753       .mesa = _NEW_BUFFERS,
 754       .brw = BRW_NEW_BATCH,
 755       .cache = 0
 756    },
 757    .emit = brw_update_renderbuffer_surfaces,
 758 };
 759
 760
 761 static void
 762 update_stage_texture_surfaces(struct brw_context *brw,
 763                               const struct gl_program *prog,
 764                               struct brw_stage_state *stage_state,
 765                               bool for_gather)
 766 {
 767    if (!prog)
 768       return;
 769
 770    struct gl_context *ctx = &brw->ctx;
 771
 772    uint32_t *surf_offset = stage_state->surf_offset;
 773    if (for_gather)
 774       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 775    else
 776       surf_offset += stage_state->prog_data->binding_table.texture_start;
 777
 778    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 779    for (unsigned s = 0; s < num_samplers; s++) {
 780       surf_offset[s] = 0;
 781
 782       if (prog->SamplersUsed & (1 << s)) {
 783          const unsigned unit = prog->SamplerUnits[s];
 784
 785          /* _NEW_TEXTURE */
 786          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 787             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 788          }
 789       }
 790    }
 791 }
 792
 793
 794 /**
 795  * Construct SURFACE_STATE objects for enabled textures.
 796  */
 797 static void
 798 brw_update_texture_surfaces(struct brw_context *brw)
 799 {
 800    /* BRW_NEW_VERTEX_PROGRAM */
 801    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 802
 803    /* BRW_NEW_GEOMETRY_PROGRAM */
 804    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 805
 806    /* BRW_NEW_FRAGMENT_PROGRAM */
 807    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 808
 809    /* _NEW_TEXTURE */
 810    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 811    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 812    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 813
 814    /* emit alternate set of surface state for gather. this
 815     * allows the surface format to be overriden for only the
 816     * gather4 messages. */
 817    if (vs && vs->UsesGather)
 818       update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 819    if (gs && gs->UsesGather)
 820       update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 821    if (fs && fs->UsesGather)
 822       update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 823
 824    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 825 }
 826
 827 const struct brw_tracked_state brw_texture_surfaces = {
 828    .dirty = {
 829       .mesa = _NEW_TEXTURE,
 830       .brw = BRW_NEW_BATCH |
 831              BRW_NEW_UNIFORM_BUFFER |
 832              BRW_NEW_VERTEX_PROGRAM |
 833              BRW_NEW_GEOMETRY_PROGRAM |
 834              BRW_NEW_FRAGMENT_PROGRAM,
 835       .cache = 0
 836    },
 837    .emit = brw_update_texture_surfaces,
 838 };
 839
 840 void
 841 brw_upload_ubo_surfaces(struct brw_context *brw,
 842                         struct gl_shader *shader,
 843                         struct brw_stage_state *stage_state,
 844                         struct brw_stage_prog_data *prog_data)
 845 {
 846    struct gl_context *ctx = &brw->ctx;
 847
 848    if (!shader)
 849       return;
 850
 851    uint32_t *surf_offsets =
 852       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 853
 854    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 855       struct gl_uniform_buffer_binding *binding;
 856       struct intel_buffer_object *intel_bo;
 857
 858       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 859       intel_bo = intel_buffer_object(binding->BufferObject);
 860       drm_intel_bo *bo =
 861          intel_bufferobj_buffer(brw, intel_bo,
 862                                 binding->Offset,
 863                                 binding->BufferObject->Size - binding->Offset);
 864
 865       /* Because behavior for referencing outside of the binding's size in the
 866        * glBindBufferRange case is undefined, we can just bind the whole buffer
 867        * glBindBufferBase wants and be a correct implementation.
 868        */
 869       brw_create_constant_surface(brw, bo, binding->Offset,
 870                                   bo->size - binding->Offset,
 871                                   &surf_offsets[i],
 872                                   shader->Stage == MESA_SHADER_FRAGMENT);
 873    }
 874
 875    if (shader->NumUniformBlocks)
 876       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 877 }
 878
 879 static void
 880 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 881 {
 882    struct gl_context *ctx = &brw->ctx;
 883    /* _NEW_PROGRAM */
 884    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 885
 886    if (!prog)
 887       return;
 888
 889    /* CACHE_NEW_WM_PROG */
 890    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 891                            &brw->wm.base, &brw->wm.prog_data->base);
 892 }
 893
 894 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 895    .dirty = {
 896       .mesa = _NEW_PROGRAM,
 897       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 898       .cache = CACHE_NEW_WM_PROG,
 899    },
 900    .emit = brw_upload_wm_ubo_surfaces,
 901 };
 902
 903 void
 904 brw_upload_abo_surfaces(struct brw_context *brw,
 905                         struct gl_shader_program *prog,
 906                         struct brw_stage_state *stage_state,
 907                         struct brw_stage_prog_data *prog_data)
 908 {
 909    struct gl_context *ctx = &brw->ctx;
 910    uint32_t *surf_offsets =
 911       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 912
 913    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 914       struct gl_atomic_buffer_binding *binding =
 915          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 916       struct intel_buffer_object *intel_bo =
 917          intel_buffer_object(binding->BufferObject);
 918       drm_intel_bo *bo = intel_bufferobj_buffer(
 919          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 920
 921       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 922                                    bo->size - binding->Offset,
 923                                    &surf_offsets[i], true);
 924    }
 925
 926    if (prog->NumUniformBlocks)
 927       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 928 }
 929
 930 static void
 931 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 932 {
 933    struct gl_context *ctx = &brw->ctx;
 934    /* _NEW_PROGRAM */
 935    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 936
 937    if (prog) {
 938       /* CACHE_NEW_WM_PROG */
 939       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 940                               &brw->wm.prog_data->base);
 941    }
 942 }
 943
 944 const struct brw_tracked_state brw_wm_abo_surfaces = {
 945    .dirty = {
 946       .mesa = _NEW_PROGRAM,
 947       .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
 948       .cache = CACHE_NEW_WM_PROG,
 949    },
 950    .emit = brw_upload_wm_abo_surfaces,
 951 };
 952
 953 void
 954 gen4_init_vtable_surface_functions(struct brw_context *brw)
 955 {
 956    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 957    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 958    brw->vtbl.update_null_renderbuffer_surface =
 959       brw_update_null_renderbuffer_surface;
 960    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 961 }