src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch,
 201                                unsigned mocs,
 202                                bool rw)
 203 {
 204    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 205                                     6 * 4, 32, out_offset);
 206    memset(surf, 0, 6 * 4);
 207
 208    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 209              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 210              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 211    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 212    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 213              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 214    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 215              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 216
 217    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 218     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 219     * physical cache.  It is mapped in hardware to the sampler cache."
 220     */
 221    if (bo) {
 222       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 223                               bo, buffer_offset,
 224                               I915_GEM_DOMAIN_SAMPLER,
 225                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 226    }
 227 }
 228
 229 void
 230 brw_update_buffer_texture_surface(struct gl_context *ctx,
 231                                   unsigned unit,
 232                                   uint32_t *surf_offset)
 233 {
 234    struct brw_context *brw = brw_context(ctx);
 235    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 236    struct intel_buffer_object *intel_obj =
 237       intel_buffer_object(tObj->BufferObject);
 238    uint32_t size = tObj->BufferSize;
 239    drm_intel_bo *bo = NULL;
 240    gl_format format = tObj->_BufferObjectFormat;
 241    uint32_t brw_format = brw_format_for_mesa_format(format);
 242    int texel_size = _mesa_get_format_bytes(format);
 243
 244    if (intel_obj) {
 245       size = MIN2(size, intel_obj->Base.Size);
 246       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 247    }
 248
 249    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 250       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 251                     _mesa_get_format_name(format));
 252    }
 253
 254    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 255                                        tObj->BufferOffset,
 256                                        brw_format,
 257                                        size / texel_size,
 258                                        texel_size,
 259                                        0, /* mocs */
 260                                        false /* rw */);
 261 }
 262
 263 static void
 264 brw_update_texture_surface(struct gl_context *ctx,
 265                            unsigned unit,
 266                            uint32_t *surf_offset,
 267                            bool for_gather)
 268 {
 269    struct brw_context *brw = brw_context(ctx);
 270    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 271    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 272    struct intel_mipmap_tree *mt = intelObj->mt;
 273    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 274    uint32_t *surf;
 275
 276    /* BRW_NEW_UNIFORM_BUFFER */
 277    if (tObj->Target == GL_TEXTURE_BUFFER) {
 278       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 279       return;
 280    }
 281
 282    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 283                           6 * 4, 32, surf_offset);
 284
 285    (void) for_gather;   /* no w/a to apply for this gen */
 286
 287    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 288               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 289               BRW_SURFACE_CUBEFACE_ENABLES |
 290               (translate_tex_format(brw,
 291                                     mt->format,
 292                                     sampler->sRGBDecode) <<
 293                BRW_SURFACE_FORMAT_SHIFT));
 294
 295    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 296
 297    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 298               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 299               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 300
 301    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 302               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 303               (intelObj->mt->region->pitch - 1) <<
 304               BRW_SURFACE_PITCH_SHIFT);
 305
 306    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 307               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 308
 309    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 310
 311    /* Emit relocation to surface contents */
 312    drm_intel_bo_emit_reloc(brw->batch.bo,
 313                            *surf_offset + 4,
 314                            intelObj->mt->region->bo,
 315                            surf[1] - intelObj->mt->region->bo->offset,
 316                            I915_GEM_DOMAIN_SAMPLER, 0);
 317 }
 318
 319 /**
 320  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 321  * read from this buffer with Data Port Read instructions/messages.
 322  */
 323 void
 324 brw_create_constant_surface(struct brw_context *brw,
 325                             drm_intel_bo *bo,
 326                             uint32_t offset,
 327                             uint32_t size,
 328                             uint32_t *out_offset,
 329                             bool dword_pitch)
 330 {
 331    uint32_t stride = dword_pitch ? 4 : 16;
 332    uint32_t elements = ALIGN(size, stride) / stride;
 333
 334    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 335                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 336                                        elements, stride, 0, false);
 337 }
 338
 339 /**
 340  * Set up a binding table entry for use by stream output logic (transform
 341  * feedback).
 342  *
 343  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 344  */
 345 void
 346 brw_update_sol_surface(struct brw_context *brw,
 347                        struct gl_buffer_object *buffer_obj,
 348                        uint32_t *out_offset, unsigned num_vector_components,
 349                        unsigned stride_dwords, unsigned offset_dwords)
 350 {
 351    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 352    uint32_t offset_bytes = 4 * offset_dwords;
 353    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 354                                              offset_bytes,
 355                                              buffer_obj->Size - offset_bytes);
 356    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 357                                     out_offset);
 358    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 359    size_t size_dwords = buffer_obj->Size / 4;
 360    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 361
 362    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 363     * too big to map using a single binding table entry?
 364     */
 365    assert((size_dwords - offset_dwords) / stride_dwords
 366           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 367
 368    if (size_dwords > offset_dwords + num_vector_components) {
 369       /* There is room for at least 1 transform feedback output in the buffer.
 370        * Compute the number of additional transform feedback outputs the
 371        * buffer has room for.
 372        */
 373       buffer_size_minus_1 =
 374          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 375    } else {
 376       /* There isn't even room for a single transform feedback output in the
 377        * buffer.  We can't configure the binding table entry to prevent output
 378        * entirely; we'll have to rely on the geometry shader to detect
 379        * overflow.  But to minimize the damage in case of a bug, set up the
 380        * binding table entry to just allow a single output.
 381        */
 382       buffer_size_minus_1 = 0;
 383    }
 384    width = buffer_size_minus_1 & 0x7f;
 385    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 386    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 387
 388    switch (num_vector_components) {
 389    case 1:
 390       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 391       break;
 392    case 2:
 393       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 394       break;
 395    case 3:
 396       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 397       break;
 398    case 4:
 399       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 400       break;
 401    default:
 402       assert(!"Invalid vector size for transform feedback output");
 403       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 404       break;
 405    }
 406
 407    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 408       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 409       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 410       BRW_SURFACE_RC_READ_WRITE;
 411    surf[1] = bo->offset + offset_bytes; /* reloc */
 412    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 413               height << BRW_SURFACE_HEIGHT_SHIFT);
 414    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 415               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 416    surf[4] = 0;
 417    surf[5] = 0;
 418
 419    /* Emit relocation to surface contents. */
 420    drm_intel_bo_emit_reloc(brw->batch.bo,
 421                            *out_offset + 4,
 422                            bo, offset_bytes,
 423                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 424 }
 425
 426 /* Creates a new WM constant buffer reflecting the current fragment program's
 427  * constants, if needed by the fragment program.
 428  *
 429  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 430  * state atom.
 431  */
 432 static void
 433 brw_upload_wm_pull_constants(struct brw_context *brw)
 434 {
 435    struct gl_context *ctx = &brw->ctx;
 436    /* BRW_NEW_FRAGMENT_PROGRAM */
 437    struct brw_fragment_program *fp =
 438       (struct brw_fragment_program *) brw->fragment_program;
 439    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 440    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 441    const int surf_index =
 442       brw->wm.prog_data->base.binding_table.pull_constants_start;
 443    float *constants;
 444    unsigned int i;
 445
 446    _mesa_load_state_parameters(ctx, params);
 447
 448    /* CACHE_NEW_WM_PROG */
 449    if (brw->wm.prog_data->nr_pull_params == 0) {
 450       if (brw->wm.base.const_bo) {
 451          drm_intel_bo_unreference(brw->wm.base.const_bo);
 452          brw->wm.base.const_bo = NULL;
 453          brw->wm.base.surf_offset[surf_index] = 0;
 454          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 455       }
 456       return;
 457    }
 458
 459    drm_intel_bo_unreference(brw->wm.base.const_bo);
 460    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 461                                          size, 64);
 462
 463    /* _NEW_PROGRAM_CONSTANTS */
 464    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 465    constants = brw->wm.base.const_bo->virtual;
 466    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 467       constants[i] = *brw->wm.prog_data->pull_param[i];
 468    }
 469    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 470
 471    brw_create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 472                                &brw->wm.base.surf_offset[surf_index],
 473                                true);
 474
 475    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 476 }
 477
 478 const struct brw_tracked_state brw_wm_pull_constants = {
 479    .dirty = {
 480       .mesa = (_NEW_PROGRAM_CONSTANTS),
 481       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 482       .cache = CACHE_NEW_WM_PROG,
 483    },
 484    .emit = brw_upload_wm_pull_constants,
 485 };
 486
 487 static void
 488 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 489 {
 490    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 491     * Notes):
 492     *
 493     *     A null surface will be used in instances where an actual surface is
 494     *     not bound. When a write message is generated to a null surface, no
 495     *     actual surface is written to. When a read message (including any
 496     *     sampling engine message) is generated to a null surface, the result
 497     *     is all zeros. Note that a null surface type is allowed to be used
 498     *     with all messages, even if it is not specificially indicated as
 499     *     supported. All of the remaining fields in surface state are ignored
 500     *     for null surfaces, with the following exceptions:
 501     *
 502     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 503     *       depth buffer’s corresponding state for all render target surfaces,
 504     *       including null.
 505     *
 506     *     - Surface Format must be R8G8B8A8_UNORM.
 507     */
 508    struct gl_context *ctx = &brw->ctx;
 509    uint32_t *surf;
 510    unsigned surface_type = BRW_SURFACE_NULL;
 511    drm_intel_bo *bo = NULL;
 512    unsigned pitch_minus_1 = 0;
 513    uint32_t multisampling_state = 0;
 514    uint32_t surf_index =
 515       brw->wm.prog_data->binding_table.render_target_start + unit;
 516
 517    /* _NEW_BUFFERS */
 518    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 519
 520    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 521                           &brw->wm.base.surf_offset[surf_index]);
 522
 523    if (fb->Visual.samples > 1) {
 524       /* On Gen6, null render targets seem to cause GPU hangs when
 525        * multisampling.  So work around this problem by rendering into dummy
 526        * color buffer.
 527        *
 528        * To decrease the amount of memory needed by the workaround buffer, we
 529        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 530        * the amount of memory needed for the workaround buffer is
 531        * (width_in_tiles + height_in_tiles - 1) tiles.
 532        *
 533        * Note that since the workaround buffer will be interpreted by the
 534        * hardware as an interleaved multisampled buffer, we need to compute
 535        * width_in_tiles and height_in_tiles by dividing the width and height
 536        * by 16 rather than the normal Y-tile size of 32.
 537        */
 538       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 539       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 540       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 541       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 542                          size_needed);
 543       bo = brw->wm.multisampled_null_render_target_bo;
 544       surface_type = BRW_SURFACE_2D;
 545       pitch_minus_1 = 127;
 546       multisampling_state =
 547          brw_get_surface_num_multisamples(fb->Visual.samples);
 548    }
 549
 550    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 551               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 552    if (brw->gen < 6) {
 553       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 554                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 555                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 556                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 557    }
 558    surf[1] = bo ? bo->offset : 0;
 559    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 560               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 561
 562    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 563     * Notes):
 564     *
 565     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 566     */
 567    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 568               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 569    surf[4] = multisampling_state;
 570    surf[5] = 0;
 571
 572    if (bo) {
 573       drm_intel_bo_emit_reloc(brw->batch.bo,
 574                               brw->wm.base.surf_offset[surf_index] + 4,
 575                               bo, 0,
 576                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 577    }
 578 }
 579
 580 /**
 581  * Sets up a surface state structure to point at the given region.
 582  * While it is only used for the front/back buffer currently, it should be
 583  * usable for further buffers when doing ARB_draw_buffer support.
 584  */
 585 static void
 586 brw_update_renderbuffer_surface(struct brw_context *brw,
 587                                 struct gl_renderbuffer *rb,
 588                                 bool layered,
 589                                 unsigned int unit)
 590 {
 591    struct gl_context *ctx = &brw->ctx;
 592    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 593    struct intel_mipmap_tree *mt = irb->mt;
 594    struct intel_region *region;
 595    uint32_t *surf;
 596    uint32_t tile_x, tile_y;
 597    uint32_t format = 0;
 598    /* _NEW_BUFFERS */
 599    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 600    uint32_t surf_index =
 601       brw->wm.prog_data->binding_table.render_target_start + unit;
 602
 603    assert(!layered);
 604
 605    if (rb->TexImage && !brw->has_surface_tile_offset) {
 606       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 607
 608       if (tile_x != 0 || tile_y != 0) {
 609          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 610           * destination in a miptree unless you actually setup your renderbuffer
 611           * as a miptree and used the fragile lod/array_index/etc. controls to
 612           * select the image.  So, instead, we just make a new single-level
 613           * miptree and render into that.
 614           */
 615          intel_renderbuffer_move_to_temp(brw, irb, false);
 616          mt = irb->mt;
 617       }
 618    }
 619
 620    intel_miptree_used_for_rendering(irb->mt);
 621
 622    region = irb->mt->region;
 623
 624    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 625                           &brw->wm.base.surf_offset[surf_index]);
 626
 627    format = brw->render_target_format[rb_format];
 628    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 629       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 630                     __FUNCTION__, _mesa_get_format_name(rb_format));
 631    }
 632
 633    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 634               format << BRW_SURFACE_FORMAT_SHIFT);
 635
 636    /* reloc */
 637    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 638               region->bo->offset);
 639
 640    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 641               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 642
 643    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 644               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 645
 646    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 647
 648    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 649    /* Note that the low bits of these fields are missing, so
 650     * there's the possibility of getting in trouble.
 651     */
 652    assert(tile_x % 4 == 0);
 653    assert(tile_y % 2 == 0);
 654    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 655               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 656               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 657
 658    if (brw->gen < 6) {
 659       /* _NEW_COLOR */
 660       if (!ctx->Color.ColorLogicOpEnabled &&
 661           (ctx->Color.BlendEnabled & (1 << unit)))
 662          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 663
 664       if (!ctx->Color.ColorMask[unit][0])
 665          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 666       if (!ctx->Color.ColorMask[unit][1])
 667          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 668       if (!ctx->Color.ColorMask[unit][2])
 669          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 670
 671       /* As mentioned above, disable writes to the alpha component when the
 672        * renderbuffer is XRGB.
 673        */
 674       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 675           !ctx->Color.ColorMask[unit][3]) {
 676          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 677       }
 678    }
 679
 680    drm_intel_bo_emit_reloc(brw->batch.bo,
 681                            brw->wm.base.surf_offset[surf_index] + 4,
 682                            region->bo,
 683                            surf[1] - region->bo->offset,
 684                            I915_GEM_DOMAIN_RENDER,
 685                            I915_GEM_DOMAIN_RENDER);
 686 }
 687
 688 /**
 689  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 690  */
 691 static void
 692 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 693 {
 694    struct gl_context *ctx = &brw->ctx;
 695    GLuint i;
 696
 697    /* _NEW_BUFFERS | _NEW_COLOR */
 698    /* Update surfaces for drawing buffers */
 699    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 700       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 701          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 702             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 703                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 704          } else {
 705             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 706          }
 707       }
 708    } else {
 709       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 710    }
 711    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 712 }
 713
 714 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 715    .dirty = {
 716       .mesa = (_NEW_COLOR |
 717                _NEW_BUFFERS),
 718       .brw = BRW_NEW_BATCH,
 719       .cache = 0
 720    },
 721    .emit = brw_update_renderbuffer_surfaces,
 722 };
 723
 724 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 725    .dirty = {
 726       .mesa = _NEW_BUFFERS,
 727       .brw = BRW_NEW_BATCH,
 728       .cache = 0
 729    },
 730    .emit = brw_update_renderbuffer_surfaces,
 731 };
 732
 733
 734 static void
 735 update_stage_texture_surfaces(struct brw_context *brw,
 736                               const struct gl_program *prog,
 737                               struct brw_stage_state *stage_state,
 738                               bool for_gather)
 739 {
 740    if (!prog)
 741       return;
 742
 743    struct gl_context *ctx = &brw->ctx;
 744
 745    uint32_t *surf_offset = stage_state->surf_offset;
 746    if (for_gather)
 747       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 748    else
 749       surf_offset += stage_state->prog_data->binding_table.texture_start;
 750
 751    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 752    for (unsigned s = 0; s < num_samplers; s++) {
 753       surf_offset[s] = 0;
 754
 755       if (prog->SamplersUsed & (1 << s)) {
 756          const unsigned unit = prog->SamplerUnits[s];
 757
 758          /* _NEW_TEXTURE */
 759          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 760             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 761          }
 762       }
 763    }
 764 }
 765
 766
 767 /**
 768  * Construct SURFACE_STATE objects for enabled textures.
 769  */
 770 static void
 771 brw_update_texture_surfaces(struct brw_context *brw)
 772 {
 773    /* BRW_NEW_VERTEX_PROGRAM */
 774    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 775
 776    /* BRW_NEW_GEOMETRY_PROGRAM */
 777    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 778
 779    /* BRW_NEW_FRAGMENT_PROGRAM */
 780    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 781
 782    /* _NEW_TEXTURE */
 783    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 784    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 785    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 786
 787    /* emit alternate set of surface state for gather. this
 788     * allows the surface format to be overriden for only the
 789     * gather4 messages. */
 790    if (vs && vs->UsesGather)
 791       update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 792    if (gs && gs->UsesGather)
 793       update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 794    if (fs && fs->UsesGather)
 795       update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 796
 797    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 798 }
 799
 800 const struct brw_tracked_state brw_texture_surfaces = {
 801    .dirty = {
 802       .mesa = _NEW_TEXTURE,
 803       .brw = BRW_NEW_BATCH |
 804              BRW_NEW_UNIFORM_BUFFER |
 805              BRW_NEW_VERTEX_PROGRAM |
 806              BRW_NEW_GEOMETRY_PROGRAM |
 807              BRW_NEW_FRAGMENT_PROGRAM,
 808       .cache = 0
 809    },
 810    .emit = brw_update_texture_surfaces,
 811 };
 812
 813 void
 814 brw_upload_ubo_surfaces(struct brw_context *brw,
 815                         struct gl_shader *shader,
 816                         struct brw_stage_state *stage_state,
 817                         struct brw_stage_prog_data *prog_data)
 818 {
 819    struct gl_context *ctx = &brw->ctx;
 820
 821    if (!shader)
 822       return;
 823
 824    uint32_t *surf_offsets =
 825       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 826
 827    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 828       struct gl_uniform_buffer_binding *binding;
 829       struct intel_buffer_object *intel_bo;
 830
 831       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 832       intel_bo = intel_buffer_object(binding->BufferObject);
 833       drm_intel_bo *bo =
 834          intel_bufferobj_buffer(brw, intel_bo,
 835                                 binding->Offset,
 836                                 binding->BufferObject->Size - binding->Offset);
 837
 838       /* Because behavior for referencing outside of the binding's size in the
 839        * glBindBufferRange case is undefined, we can just bind the whole buffer
 840        * glBindBufferBase wants and be a correct implementation.
 841        */
 842       brw_create_constant_surface(brw, bo, binding->Offset,
 843                                   bo->size - binding->Offset,
 844                                   &surf_offsets[i],
 845                                   shader->Stage == MESA_SHADER_FRAGMENT);
 846    }
 847
 848    if (shader->NumUniformBlocks)
 849       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 850 }
 851
 852 static void
 853 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 854 {
 855    struct gl_context *ctx = &brw->ctx;
 856    /* _NEW_PROGRAM */
 857    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 858
 859    if (!prog)
 860       return;
 861
 862    /* CACHE_NEW_WM_PROG */
 863    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 864                            &brw->wm.base, &brw->wm.prog_data->base);
 865 }
 866
 867 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 868    .dirty = {
 869       .mesa = _NEW_PROGRAM,
 870       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 871       .cache = CACHE_NEW_WM_PROG,
 872    },
 873    .emit = brw_upload_wm_ubo_surfaces,
 874 };
 875
 876 void
 877 brw_upload_abo_surfaces(struct brw_context *brw,
 878                         struct gl_shader_program *prog,
 879                         struct brw_stage_state *stage_state,
 880                         struct brw_stage_prog_data *prog_data)
 881 {
 882    struct gl_context *ctx = &brw->ctx;
 883    uint32_t *surf_offsets =
 884       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 885
 886    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 887       struct gl_atomic_buffer_binding *binding =
 888          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 889       struct intel_buffer_object *intel_bo =
 890          intel_buffer_object(binding->BufferObject);
 891       drm_intel_bo *bo = intel_bufferobj_buffer(
 892          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 893
 894       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 895                                    bo->size - binding->Offset,
 896                                    &surf_offsets[i], true);
 897    }
 898
 899    if (prog->NumUniformBlocks)
 900       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 901 }
 902
 903 static void
 904 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 905 {
 906    struct gl_context *ctx = &brw->ctx;
 907    /* _NEW_PROGRAM */
 908    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 909
 910    if (prog) {
 911       /* CACHE_NEW_WM_PROG */
 912       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 913                               &brw->wm.prog_data->base);
 914    }
 915 }
 916
 917 const struct brw_tracked_state brw_wm_abo_surfaces = {
 918    .dirty = {
 919       .mesa = _NEW_PROGRAM,
 920       .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
 921       .cache = CACHE_NEW_WM_PROG,
 922    },
 923    .emit = brw_upload_wm_abo_surfaces,
 924 };
 925
 926 void
 927 gen4_init_vtable_surface_functions(struct brw_context *brw)
 928 {
 929    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 930    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 931    brw->vtbl.update_null_renderbuffer_surface =
 932       brw_update_null_renderbuffer_surface;
 933    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 934 }