src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193
 194 static void
 195 brw_update_buffer_texture_surface(struct gl_context *ctx,
 196                                   unsigned unit,
 197                                   uint32_t *binding_table,
 198                                   unsigned surf_index)
 199 {
 200    struct brw_context *brw = brw_context(ctx);
 201    struct intel_context *intel = &brw->intel;
 202    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 203    uint32_t *surf;
 204    struct intel_buffer_object *intel_obj =
 205       intel_buffer_object(tObj->BufferObject);
 206    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 207    gl_format format = tObj->_BufferObjectFormat;
 208    uint32_t brw_format = brw_format_for_mesa_format(format);
 209    int texel_size = _mesa_get_format_bytes(format);
 210
 211    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 212       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 213                     _mesa_get_format_name(format));
 214    }
 215
 216    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 217                           6 * 4, 32, &binding_table[surf_index]);
 218
 219    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 220               (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
 221
 222    if (intel->gen >= 6)
 223       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 224
 225    if (bo) {
 226       surf[1] = bo->offset; /* reloc */
 227
 228       /* Emit relocation to surface contents. */
 229       drm_intel_bo_emit_reloc(brw->batch.bo,
 230                               binding_table[surf_index] + 4,
 231                               bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
 232
 233       int w = intel_obj->Base.Size / texel_size;
 234       surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 235                  ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 236       surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 237                  (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
 238    } else {
 239       surf[1] = 0;
 240       surf[2] = 0;
 241       surf[3] = 0;
 242    }
 243
 244    surf[4] = 0;
 245    surf[5] = 0;
 246 }
 247
 248 static void
 249 brw_update_texture_surface(struct gl_context *ctx,
 250                            unsigned unit,
 251                            uint32_t *binding_table,
 252                            unsigned surf_index)
 253 {
 254    struct brw_context *brw = brw_context(ctx);
 255    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 256    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 257    struct intel_mipmap_tree *mt = intelObj->mt;
 258    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
 259    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 260    uint32_t *surf;
 261    uint32_t tile_x, tile_y;
 262
 263    if (tObj->Target == GL_TEXTURE_BUFFER) {
 264       brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
 265       return;
 266    }
 267
 268    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 269                           6 * 4, 32, &binding_table[surf_index]);
 270
 271    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 272               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 273               BRW_SURFACE_CUBEFACE_ENABLES |
 274               (translate_tex_format(brw,
 275                                     mt->format,
 276                                     tObj->DepthMode,
 277                                     sampler->sRGBDecode) <<
 278                BRW_SURFACE_FORMAT_SHIFT));
 279
 280    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 281    surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
 282                                              &tile_x, &tile_y);
 283
 284    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 285               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 286               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 287
 288    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 289               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 290               (intelObj->mt->region->pitch - 1) <<
 291               BRW_SURFACE_PITCH_SHIFT);
 292
 293    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
 294
 295    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 296    /* Note that the low bits of these fields are missing, so
 297     * there's the possibility of getting in trouble.
 298     */
 299    assert(tile_x % 4 == 0);
 300    assert(tile_y % 2 == 0);
 301    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 302               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 303               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 304
 305    /* Emit relocation to surface contents */
 306    drm_intel_bo_emit_reloc(brw->batch.bo,
 307                            binding_table[surf_index] + 4,
 308                            intelObj->mt->region->bo,
 309                            surf[1] - intelObj->mt->region->bo->offset,
 310                            I915_GEM_DOMAIN_SAMPLER, 0);
 311 }
 312
 313 /**
 314  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 315  * read from this buffer with Data Port Read instructions/messages.
 316  */
 317 static void
 318 brw_create_constant_surface(struct brw_context *brw,
 319                             drm_intel_bo *bo,
 320                             uint32_t offset,
 321                             uint32_t size,
 322                             uint32_t *out_offset,
 323                             bool dword_pitch)
 324 {
 325    struct intel_context *intel = &brw->intel;
 326    uint32_t stride = dword_pitch ? 4 : 16;
 327    uint32_t elements = ALIGN(size, stride) / stride;
 328    const GLint w = elements - 1;
 329    uint32_t *surf;
 330
 331    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 332                           6 * 4, 32, out_offset);
 333
 334    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 335               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 336               BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
 337
 338    if (intel->gen >= 6)
 339       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 340
 341    surf[1] = bo->offset + offset; /* reloc */
 342
 343    surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 344               ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 345
 346    surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 347               (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 348
 349    surf[4] = 0;
 350    surf[5] = 0;
 351
 352    /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
 353     * bspec ("Data Cache") says that the data cache does not exist as
 354     * a separate cache and is just the sampler cache.
 355     */
 356    drm_intel_bo_emit_reloc(brw->batch.bo,
 357                            *out_offset + 4,
 358                            bo, offset,
 359                            I915_GEM_DOMAIN_SAMPLER, 0);
 360 }
 361
 362 /**
 363  * Set up a binding table entry for use by stream output logic (transform
 364  * feedback).
 365  *
 366  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 367  */
 368 void
 369 brw_update_sol_surface(struct brw_context *brw,
 370                        struct gl_buffer_object *buffer_obj,
 371                        uint32_t *out_offset, unsigned num_vector_components,
 372                        unsigned stride_dwords, unsigned offset_dwords)
 373 {
 374    struct intel_context *intel = &brw->intel;
 375    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 376    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 377    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 378                                     out_offset);
 379    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 380    uint32_t offset_bytes = 4 * offset_dwords;
 381    size_t size_dwords = buffer_obj->Size / 4;
 382    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 383
 384    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 385     * too big to map using a single binding table entry?
 386     */
 387    assert((size_dwords - offset_dwords) / stride_dwords
 388           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 389
 390    if (size_dwords > offset_dwords + num_vector_components) {
 391       /* There is room for at least 1 transform feedback output in the buffer.
 392        * Compute the number of additional transform feedback outputs the
 393        * buffer has room for.
 394        */
 395       buffer_size_minus_1 =
 396          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 397    } else {
 398       /* There isn't even room for a single transform feedback output in the
 399        * buffer.  We can't configure the binding table entry to prevent output
 400        * entirely; we'll have to rely on the geometry shader to detect
 401        * overflow.  But to minimize the damage in case of a bug, set up the
 402        * binding table entry to just allow a single output.
 403        */
 404       buffer_size_minus_1 = 0;
 405    }
 406    width = buffer_size_minus_1 & 0x7f;
 407    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 408    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 409
 410    switch (num_vector_components) {
 411    case 1:
 412       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 413       break;
 414    case 2:
 415       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 416       break;
 417    case 3:
 418       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 419       break;
 420    case 4:
 421       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 422       break;
 423    default:
 424       assert(!"Invalid vector size for transform feedback output");
 425       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 426       break;
 427    }
 428
 429    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 430       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 431       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 432       BRW_SURFACE_RC_READ_WRITE;
 433    surf[1] = bo->offset + offset_bytes; /* reloc */
 434    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 435               height << BRW_SURFACE_HEIGHT_SHIFT);
 436    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 437               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 438    surf[4] = 0;
 439    surf[5] = 0;
 440
 441    /* Emit relocation to surface contents. */
 442    drm_intel_bo_emit_reloc(brw->batch.bo,
 443                            *out_offset + 4,
 444                            bo, offset_bytes,
 445                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 446 }
 447
 448 /* Creates a new WM constant buffer reflecting the current fragment program's
 449  * constants, if needed by the fragment program.
 450  *
 451  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 452  * state atom.
 453  */
 454 static void
 455 brw_upload_wm_pull_constants(struct brw_context *brw)
 456 {
 457    struct gl_context *ctx = &brw->intel.ctx;
 458    struct intel_context *intel = &brw->intel;
 459    /* BRW_NEW_FRAGMENT_PROGRAM */
 460    struct brw_fragment_program *fp =
 461       (struct brw_fragment_program *) brw->fragment_program;
 462    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 463    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 464    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 465    float *constants;
 466    unsigned int i;
 467
 468    _mesa_load_state_parameters(ctx, params);
 469
 470    /* CACHE_NEW_WM_PROG */
 471    if (brw->wm.prog_data->nr_pull_params == 0) {
 472       if (brw->wm.const_bo) {
 473          drm_intel_bo_unreference(brw->wm.const_bo);
 474          brw->wm.const_bo = NULL;
 475          brw->wm.surf_offset[surf_index] = 0;
 476          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 477       }
 478       return;
 479    }
 480
 481    drm_intel_bo_unreference(brw->wm.const_bo);
 482    brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 483                                          size, 64);
 484
 485    /* _NEW_PROGRAM_CONSTANTS */
 486    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
 487    constants = brw->wm.const_bo->virtual;
 488    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 489       constants[i] = *brw->wm.prog_data->pull_param[i];
 490    }
 491    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 492
 493    brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
 494                                      &brw->wm.surf_offset[surf_index],
 495                                      true);
 496
 497    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 498 }
 499
 500 const struct brw_tracked_state brw_wm_pull_constants = {
 501    .dirty = {
 502       .mesa = (_NEW_PROGRAM_CONSTANTS),
 503       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 504       .cache = CACHE_NEW_WM_PROG,
 505    },
 506    .emit = brw_upload_wm_pull_constants,
 507 };
 508
 509 static void
 510 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 511 {
 512    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 513     * Notes):
 514     *
 515     *     A null surface will be used in instances where an actual surface is
 516     *     not bound. When a write message is generated to a null surface, no
 517     *     actual surface is written to. When a read message (including any
 518     *     sampling engine message) is generated to a null surface, the result
 519     *     is all zeros. Note that a null surface type is allowed to be used
 520     *     with all messages, even if it is not specificially indicated as
 521     *     supported. All of the remaining fields in surface state are ignored
 522     *     for null surfaces, with the following exceptions:
 523     *
 524     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 525     *       depth buffer’s corresponding state for all render target surfaces,
 526     *       including null.
 527     *
 528     *     - Surface Format must be R8G8B8A8_UNORM.
 529     */
 530    struct intel_context *intel = &brw->intel;
 531    struct gl_context *ctx = &intel->ctx;
 532    uint32_t *surf;
 533    unsigned surface_type = BRW_SURFACE_NULL;
 534    drm_intel_bo *bo = NULL;
 535    unsigned pitch_minus_1 = 0;
 536    uint32_t multisampling_state = 0;
 537
 538    /* _NEW_BUFFERS */
 539    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 540
 541    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 542                           6 * 4, 32, &brw->wm.surf_offset[unit]);
 543
 544    if (fb->Visual.samples > 1) {
 545       /* On Gen6, null render targets seem to cause GPU hangs when
 546        * multisampling.  So work around this problem by rendering into dummy
 547        * color buffer.
 548        *
 549        * To decrease the amount of memory needed by the workaround buffer, we
 550        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 551        * the amount of memory needed for the workaround buffer is
 552        * (width_in_tiles + height_in_tiles - 1) tiles.
 553        *
 554        * Note that since the workaround buffer will be interpreted by the
 555        * hardware as an interleaved multisampled buffer, we need to compute
 556        * width_in_tiles and height_in_tiles by dividing the width and height
 557        * by 16 rather than the normal Y-tile size of 32.
 558        */
 559       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 560       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 561       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 562       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 563                          size_needed);
 564       bo = brw->wm.multisampled_null_render_target_bo;
 565       surface_type = BRW_SURFACE_2D;
 566       pitch_minus_1 = 127;
 567       multisampling_state =
 568          brw_get_surface_num_multisamples(fb->Visual.samples);
 569    }
 570
 571    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 572               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 573    if (intel->gen < 6) {
 574       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 575                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 576                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 577                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 578    }
 579    surf[1] = bo ? bo->offset : 0;
 580    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 581               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 582
 583    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 584     * Notes):
 585     *
 586     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 587     */
 588    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 589               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 590    surf[4] = multisampling_state;
 591    surf[5] = 0;
 592
 593    if (bo) {
 594       drm_intel_bo_emit_reloc(brw->batch.bo,
 595                               brw->wm.surf_offset[unit] + 4,
 596                               bo, 0,
 597                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 598    }
 599 }
 600
 601 /**
 602  * Sets up a surface state structure to point at the given region.
 603  * While it is only used for the front/back buffer currently, it should be
 604  * usable for further buffers when doing ARB_draw_buffer support.
 605  */
 606 static void
 607 brw_update_renderbuffer_surface(struct brw_context *brw,
 608                                 struct gl_renderbuffer *rb,
 609                                 bool layered,
 610                                 unsigned int unit)
 611 {
 612    struct intel_context *intel = &brw->intel;
 613    struct gl_context *ctx = &intel->ctx;
 614    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 615    struct intel_mipmap_tree *mt = irb->mt;
 616    struct intel_region *region;
 617    uint32_t *surf;
 618    uint32_t tile_x, tile_y;
 619    uint32_t format = 0;
 620    /* _NEW_BUFFERS */
 621    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 622
 623    assert(!layered);
 624
 625    if (rb->TexImage && !brw->has_surface_tile_offset) {
 626       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 627
 628       if (tile_x != 0 || tile_y != 0) {
 629          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 630           * destination in a miptree unless you actually setup your renderbuffer
 631           * as a miptree and used the fragile lod/array_index/etc. controls to
 632           * select the image.  So, instead, we just make a new single-level
 633           * miptree and render into that.
 634           */
 635          intel_renderbuffer_move_to_temp(brw, irb, false);
 636          mt = irb->mt;
 637       }
 638    }
 639
 640    intel_miptree_used_for_rendering(irb->mt);
 641
 642    region = irb->mt->region;
 643
 644    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 645                           6 * 4, 32, &brw->wm.surf_offset[unit]);
 646
 647    format = brw->render_target_format[rb_format];
 648    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 649       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 650                     __FUNCTION__, _mesa_get_format_name(rb_format));
 651    }
 652
 653    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 654               format << BRW_SURFACE_FORMAT_SHIFT);
 655
 656    /* reloc */
 657    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 658               region->bo->offset);
 659
 660    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 661               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 662
 663    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 664               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 665
 666    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 667
 668    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 669    /* Note that the low bits of these fields are missing, so
 670     * there's the possibility of getting in trouble.
 671     */
 672    assert(tile_x % 4 == 0);
 673    assert(tile_y % 2 == 0);
 674    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 675               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 676               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 677
 678    if (intel->gen < 6) {
 679       /* _NEW_COLOR */
 680       if (!ctx->Color.ColorLogicOpEnabled &&
 681           (ctx->Color.BlendEnabled & (1 << unit)))
 682          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 683
 684       if (!ctx->Color.ColorMask[unit][0])
 685          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 686       if (!ctx->Color.ColorMask[unit][1])
 687          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 688       if (!ctx->Color.ColorMask[unit][2])
 689          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 690
 691       /* As mentioned above, disable writes to the alpha component when the
 692        * renderbuffer is XRGB.
 693        */
 694       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 695           !ctx->Color.ColorMask[unit][3]) {
 696          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 697       }
 698    }
 699
 700    drm_intel_bo_emit_reloc(brw->batch.bo,
 701                            brw->wm.surf_offset[unit] + 4,
 702                            region->bo,
 703                            surf[1] - region->bo->offset,
 704                            I915_GEM_DOMAIN_RENDER,
 705                            I915_GEM_DOMAIN_RENDER);
 706 }
 707
 708 /**
 709  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 710  */
 711 static void
 712 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 713 {
 714    struct gl_context *ctx = &brw->intel.ctx;
 715    GLuint i;
 716
 717    /* _NEW_BUFFERS | _NEW_COLOR */
 718    /* Update surfaces for drawing buffers */
 719    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 720       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 721          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 722             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 723                                                   ctx->DrawBuffer->Layered, i);
 724          } else {
 725             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 726          }
 727       }
 728    } else {
 729       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 730    }
 731    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 732 }
 733
 734 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 735    .dirty = {
 736       .mesa = (_NEW_COLOR |
 737                _NEW_BUFFERS),
 738       .brw = BRW_NEW_BATCH,
 739       .cache = 0
 740    },
 741    .emit = brw_update_renderbuffer_surfaces,
 742 };
 743
 744 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 745    .dirty = {
 746       .mesa = _NEW_BUFFERS,
 747       .brw = BRW_NEW_BATCH,
 748       .cache = 0
 749    },
 750    .emit = brw_update_renderbuffer_surfaces,
 751 };
 752
 753 /**
 754  * Construct SURFACE_STATE objects for enabled textures.
 755  */
 756 static void
 757 brw_update_texture_surfaces(struct brw_context *brw)
 758 {
 759    struct intel_context *intel = &brw->intel;
 760    struct gl_context *ctx = &intel->ctx;
 761
 762    /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
 763     * Unfortunately, we're stuck using the gl_program structs until the
 764     * ARB_fragment_program front-end gets converted to GLSL IR.  These
 765     * have the downside that SamplerUnits is split and only contains the
 766     * mappings for samplers active in that stage.
 767     */
 768    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 769    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 770
 771    unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
 772
 773    for (unsigned s = 0; s < num_samplers; s++) {
 774       brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
 775       brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
 776
 777       if (vs->SamplersUsed & (1 << s)) {
 778          const unsigned unit = vs->SamplerUnits[s];
 779
 780          /* _NEW_TEXTURE */
 781          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 782             brw->vtbl.update_texture_surface(ctx, unit,
 783                                              brw->vs.surf_offset,
 784                                              SURF_INDEX_VS_TEXTURE(s));
 785          }
 786       }
 787
 788       if (fs->SamplersUsed & (1 << s)) {
 789          const unsigned unit = fs->SamplerUnits[s];
 790
 791          /* _NEW_TEXTURE */
 792          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 793             brw->vtbl.update_texture_surface(ctx, unit,
 794                                              brw->wm.surf_offset,
 795                                              SURF_INDEX_TEXTURE(s));
 796          }
 797       }
 798    }
 799
 800    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 801 }
 802
 803 const struct brw_tracked_state brw_texture_surfaces = {
 804    .dirty = {
 805       .mesa = _NEW_TEXTURE,
 806       .brw = BRW_NEW_BATCH |
 807              BRW_NEW_VERTEX_PROGRAM |
 808              BRW_NEW_FRAGMENT_PROGRAM,
 809       .cache = 0
 810    },
 811    .emit = brw_update_texture_surfaces,
 812 };
 813
 814 void
 815 brw_upload_ubo_surfaces(struct brw_context *brw,
 816                         struct gl_shader *shader,
 817                         uint32_t *surf_offsets)
 818 {
 819    struct gl_context *ctx = &brw->intel.ctx;
 820
 821    if (!shader)
 822       return;
 823
 824    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 825       struct gl_uniform_buffer_binding *binding;
 826       struct intel_buffer_object *intel_bo;
 827
 828       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 829       intel_bo = intel_buffer_object(binding->BufferObject);
 830       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 831
 832       /* Because behavior for referencing outside of the binding's size in the
 833        * glBindBufferRange case is undefined, we can just bind the whole buffer
 834        * glBindBufferBase wants and be a correct implementation.
 835        */
 836       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 837                                         bo->size - binding->Offset,
 838                                         &surf_offsets[i],
 839                                         shader->Type == GL_FRAGMENT_SHADER);
 840    }
 841
 842    if (shader->NumUniformBlocks)
 843       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 844 }
 845
 846 static void
 847 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 848 {
 849    struct gl_context *ctx = &brw->intel.ctx;
 850    /* _NEW_PROGRAM */
 851    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 852
 853    if (!prog)
 854       return;
 855
 856    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 857                            &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
 858 }
 859
 860 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 861    .dirty = {
 862       .mesa = _NEW_PROGRAM,
 863       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 864       .cache = 0,
 865    },
 866    .emit = brw_upload_wm_ubo_surfaces,
 867 };
 868
 869 /**
 870  * Constructs the binding table for the WM surface state, which maps unit
 871  * numbers to surface state objects.
 872  */
 873 static void
 874 brw_upload_wm_binding_table(struct brw_context *brw)
 875 {
 876    uint32_t *bind;
 877    int i;
 878
 879    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 880       gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
 881    }
 882
 883    /* Might want to calculate nr_surfaces first, to avoid taking up so much
 884     * space for the binding table.
 885     */
 886    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 887                           sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
 888                           32, &brw->wm.bind_bo_offset);
 889
 890    /* BRW_NEW_SURFACES */
 891    for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
 892       bind[i] = brw->wm.surf_offset[i];
 893    }
 894
 895    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 896 }
 897
 898 const struct brw_tracked_state brw_wm_binding_table = {
 899    .dirty = {
 900       .mesa = 0,
 901       .brw = (BRW_NEW_BATCH |
 902               BRW_NEW_SURFACES),
 903       .cache = 0
 904    },
 905    .emit = brw_upload_wm_binding_table,
 906 };
 907
 908 void
 909 gen4_init_vtable_surface_functions(struct brw_context *brw)
 910 {
 911    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 912    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 913    brw->vtbl.update_null_renderbuffer_surface =
 914       brw_update_null_renderbuffer_surface;
 915    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 916 }