src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch)
 201 {
 202    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 203                                     6 * 4, 32, out_offset);
 204    memset(surf, 0, 6 * 4);
 205
 206    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 207              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 208              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 209    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 210    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 211              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 212    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 213              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 214
 215    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 216     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 217     * physical cache.  It is mapped in hardware to the sampler cache."
 218     */
 219    if (bo) {
 220       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 221                               bo, buffer_offset,
 222                               I915_GEM_DOMAIN_SAMPLER, 0);
 223    }
 224 }
 225
 226 static void
 227 brw_update_buffer_texture_surface(struct gl_context *ctx,
 228                                   unsigned unit,
 229                                   uint32_t *surf_offset)
 230 {
 231    struct brw_context *brw = brw_context(ctx);
 232    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 233    struct intel_buffer_object *intel_obj =
 234       intel_buffer_object(tObj->BufferObject);
 235    uint32_t size = tObj->BufferSize;
 236    drm_intel_bo *bo = NULL;
 237    gl_format format = tObj->_BufferObjectFormat;
 238    uint32_t brw_format = brw_format_for_mesa_format(format);
 239    int texel_size = _mesa_get_format_bytes(format);
 240
 241    if (intel_obj) {
 242       size = MIN2(size, intel_obj->Base.Size);
 243       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 244    }
 245
 246    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 247       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 248                     _mesa_get_format_name(format));
 249    }
 250
 251    gen4_emit_buffer_surface_state(brw, surf_offset, bo,
 252                                   tObj->BufferOffset,
 253                                   brw_format,
 254                                   size / texel_size,
 255                                   texel_size);
 256 }
 257
 258 static void
 259 brw_update_texture_surface(struct gl_context *ctx,
 260                            unsigned unit,
 261                            uint32_t *surf_offset,
 262                            bool for_gather)
 263 {
 264    struct brw_context *brw = brw_context(ctx);
 265    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 266    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 267    struct intel_mipmap_tree *mt = intelObj->mt;
 268    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 269    uint32_t *surf;
 270
 271    /* BRW_NEW_UNIFORM_BUFFER */
 272    if (tObj->Target == GL_TEXTURE_BUFFER) {
 273       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 274       return;
 275    }
 276
 277    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 278                           6 * 4, 32, surf_offset);
 279
 280    (void) for_gather;   /* no w/a to apply for this gen */
 281
 282    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 283               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 284               BRW_SURFACE_CUBEFACE_ENABLES |
 285               (translate_tex_format(brw,
 286                                     mt->format,
 287                                     tObj->DepthMode,
 288                                     sampler->sRGBDecode) <<
 289                BRW_SURFACE_FORMAT_SHIFT));
 290
 291    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 292
 293    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 294               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 295               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 296
 297    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 298               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 299               (intelObj->mt->region->pitch - 1) <<
 300               BRW_SURFACE_PITCH_SHIFT);
 301
 302    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 303               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 304
 305    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 306
 307    /* Emit relocation to surface contents */
 308    drm_intel_bo_emit_reloc(brw->batch.bo,
 309                            *surf_offset + 4,
 310                            intelObj->mt->region->bo,
 311                            surf[1] - intelObj->mt->region->bo->offset,
 312                            I915_GEM_DOMAIN_SAMPLER, 0);
 313 }
 314
 315 /**
 316  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 317  * read from this buffer with Data Port Read instructions/messages.
 318  */
 319 static void
 320 brw_create_constant_surface(struct brw_context *brw,
 321                             drm_intel_bo *bo,
 322                             uint32_t offset,
 323                             uint32_t size,
 324                             uint32_t *out_offset,
 325                             bool dword_pitch)
 326 {
 327    uint32_t stride = dword_pitch ? 4 : 16;
 328    uint32_t elements = ALIGN(size, stride) / stride;
 329
 330    gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
 331                                   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 332                                   elements, stride);
 333 }
 334
 335 /**
 336  * Set up a binding table entry for use by stream output logic (transform
 337  * feedback).
 338  *
 339  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 340  */
 341 void
 342 brw_update_sol_surface(struct brw_context *brw,
 343                        struct gl_buffer_object *buffer_obj,
 344                        uint32_t *out_offset, unsigned num_vector_components,
 345                        unsigned stride_dwords, unsigned offset_dwords)
 346 {
 347    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 348    uint32_t offset_bytes = 4 * offset_dwords;
 349    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 350                                              offset_bytes,
 351                                              buffer_obj->Size - offset_bytes);
 352    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 353                                     out_offset);
 354    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 355    size_t size_dwords = buffer_obj->Size / 4;
 356    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 357
 358    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 359     * too big to map using a single binding table entry?
 360     */
 361    assert((size_dwords - offset_dwords) / stride_dwords
 362           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 363
 364    if (size_dwords > offset_dwords + num_vector_components) {
 365       /* There is room for at least 1 transform feedback output in the buffer.
 366        * Compute the number of additional transform feedback outputs the
 367        * buffer has room for.
 368        */
 369       buffer_size_minus_1 =
 370          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 371    } else {
 372       /* There isn't even room for a single transform feedback output in the
 373        * buffer.  We can't configure the binding table entry to prevent output
 374        * entirely; we'll have to rely on the geometry shader to detect
 375        * overflow.  But to minimize the damage in case of a bug, set up the
 376        * binding table entry to just allow a single output.
 377        */
 378       buffer_size_minus_1 = 0;
 379    }
 380    width = buffer_size_minus_1 & 0x7f;
 381    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 382    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 383
 384    switch (num_vector_components) {
 385    case 1:
 386       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 387       break;
 388    case 2:
 389       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 390       break;
 391    case 3:
 392       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 393       break;
 394    case 4:
 395       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 396       break;
 397    default:
 398       assert(!"Invalid vector size for transform feedback output");
 399       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 400       break;
 401    }
 402
 403    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 404       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 405       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 406       BRW_SURFACE_RC_READ_WRITE;
 407    surf[1] = bo->offset + offset_bytes; /* reloc */
 408    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 409               height << BRW_SURFACE_HEIGHT_SHIFT);
 410    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 411               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 412    surf[4] = 0;
 413    surf[5] = 0;
 414
 415    /* Emit relocation to surface contents. */
 416    drm_intel_bo_emit_reloc(brw->batch.bo,
 417                            *out_offset + 4,
 418                            bo, offset_bytes,
 419                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 420 }
 421
 422 /* Creates a new WM constant buffer reflecting the current fragment program's
 423  * constants, if needed by the fragment program.
 424  *
 425  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 426  * state atom.
 427  */
 428 static void
 429 brw_upload_wm_pull_constants(struct brw_context *brw)
 430 {
 431    struct gl_context *ctx = &brw->ctx;
 432    /* BRW_NEW_FRAGMENT_PROGRAM */
 433    struct brw_fragment_program *fp =
 434       (struct brw_fragment_program *) brw->fragment_program;
 435    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 436    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 437    const int surf_index =
 438       brw->wm.prog_data->base.binding_table.pull_constants_start;
 439    float *constants;
 440    unsigned int i;
 441
 442    _mesa_load_state_parameters(ctx, params);
 443
 444    /* CACHE_NEW_WM_PROG */
 445    if (brw->wm.prog_data->nr_pull_params == 0) {
 446       if (brw->wm.base.const_bo) {
 447          drm_intel_bo_unreference(brw->wm.base.const_bo);
 448          brw->wm.base.const_bo = NULL;
 449          brw->wm.base.surf_offset[surf_index] = 0;
 450          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 451       }
 452       return;
 453    }
 454
 455    drm_intel_bo_unreference(brw->wm.base.const_bo);
 456    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 457                                          size, 64);
 458
 459    /* _NEW_PROGRAM_CONSTANTS */
 460    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 461    constants = brw->wm.base.const_bo->virtual;
 462    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 463       constants[i] = *brw->wm.prog_data->pull_param[i];
 464    }
 465    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 466
 467    brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 468                                      &brw->wm.base.surf_offset[surf_index],
 469                                      true);
 470
 471    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 472 }
 473
 474 const struct brw_tracked_state brw_wm_pull_constants = {
 475    .dirty = {
 476       .mesa = (_NEW_PROGRAM_CONSTANTS),
 477       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 478       .cache = CACHE_NEW_WM_PROG,
 479    },
 480    .emit = brw_upload_wm_pull_constants,
 481 };
 482
 483 static void
 484 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 485 {
 486    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 487     * Notes):
 488     *
 489     *     A null surface will be used in instances where an actual surface is
 490     *     not bound. When a write message is generated to a null surface, no
 491     *     actual surface is written to. When a read message (including any
 492     *     sampling engine message) is generated to a null surface, the result
 493     *     is all zeros. Note that a null surface type is allowed to be used
 494     *     with all messages, even if it is not specificially indicated as
 495     *     supported. All of the remaining fields in surface state are ignored
 496     *     for null surfaces, with the following exceptions:
 497     *
 498     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 499     *       depth buffer’s corresponding state for all render target surfaces,
 500     *       including null.
 501     *
 502     *     - Surface Format must be R8G8B8A8_UNORM.
 503     */
 504    struct gl_context *ctx = &brw->ctx;
 505    uint32_t *surf;
 506    unsigned surface_type = BRW_SURFACE_NULL;
 507    drm_intel_bo *bo = NULL;
 508    unsigned pitch_minus_1 = 0;
 509    uint32_t multisampling_state = 0;
 510    uint32_t surf_index =
 511       brw->wm.prog_data->binding_table.render_target_start + unit;
 512
 513    /* _NEW_BUFFERS */
 514    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 515
 516    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 517                           &brw->wm.base.surf_offset[surf_index]);
 518
 519    if (fb->Visual.samples > 1) {
 520       /* On Gen6, null render targets seem to cause GPU hangs when
 521        * multisampling.  So work around this problem by rendering into dummy
 522        * color buffer.
 523        *
 524        * To decrease the amount of memory needed by the workaround buffer, we
 525        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 526        * the amount of memory needed for the workaround buffer is
 527        * (width_in_tiles + height_in_tiles - 1) tiles.
 528        *
 529        * Note that since the workaround buffer will be interpreted by the
 530        * hardware as an interleaved multisampled buffer, we need to compute
 531        * width_in_tiles and height_in_tiles by dividing the width and height
 532        * by 16 rather than the normal Y-tile size of 32.
 533        */
 534       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 535       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 536       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 537       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 538                          size_needed);
 539       bo = brw->wm.multisampled_null_render_target_bo;
 540       surface_type = BRW_SURFACE_2D;
 541       pitch_minus_1 = 127;
 542       multisampling_state =
 543          brw_get_surface_num_multisamples(fb->Visual.samples);
 544    }
 545
 546    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 547               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 548    if (brw->gen < 6) {
 549       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 550                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 551                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 552                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 553    }
 554    surf[1] = bo ? bo->offset : 0;
 555    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 556               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 557
 558    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 559     * Notes):
 560     *
 561     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 562     */
 563    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 564               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 565    surf[4] = multisampling_state;
 566    surf[5] = 0;
 567
 568    if (bo) {
 569       drm_intel_bo_emit_reloc(brw->batch.bo,
 570                               brw->wm.base.surf_offset[surf_index] + 4,
 571                               bo, 0,
 572                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 573    }
 574 }
 575
 576 /**
 577  * Sets up a surface state structure to point at the given region.
 578  * While it is only used for the front/back buffer currently, it should be
 579  * usable for further buffers when doing ARB_draw_buffer support.
 580  */
 581 static void
 582 brw_update_renderbuffer_surface(struct brw_context *brw,
 583                                 struct gl_renderbuffer *rb,
 584                                 bool layered,
 585                                 unsigned int unit)
 586 {
 587    struct gl_context *ctx = &brw->ctx;
 588    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 589    struct intel_mipmap_tree *mt = irb->mt;
 590    struct intel_region *region;
 591    uint32_t *surf;
 592    uint32_t tile_x, tile_y;
 593    uint32_t format = 0;
 594    /* _NEW_BUFFERS */
 595    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 596    uint32_t surf_index =
 597       brw->wm.prog_data->binding_table.render_target_start + unit;
 598
 599    assert(!layered);
 600
 601    if (rb->TexImage && !brw->has_surface_tile_offset) {
 602       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 603
 604       if (tile_x != 0 || tile_y != 0) {
 605          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 606           * destination in a miptree unless you actually setup your renderbuffer
 607           * as a miptree and used the fragile lod/array_index/etc. controls to
 608           * select the image.  So, instead, we just make a new single-level
 609           * miptree and render into that.
 610           */
 611          intel_renderbuffer_move_to_temp(brw, irb, false);
 612          mt = irb->mt;
 613       }
 614    }
 615
 616    intel_miptree_used_for_rendering(irb->mt);
 617
 618    region = irb->mt->region;
 619
 620    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 621                           &brw->wm.base.surf_offset[surf_index]);
 622
 623    format = brw->render_target_format[rb_format];
 624    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 625       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 626                     __FUNCTION__, _mesa_get_format_name(rb_format));
 627    }
 628
 629    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 630               format << BRW_SURFACE_FORMAT_SHIFT);
 631
 632    /* reloc */
 633    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 634               region->bo->offset);
 635
 636    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 637               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 638
 639    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 640               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 641
 642    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 643
 644    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 645    /* Note that the low bits of these fields are missing, so
 646     * there's the possibility of getting in trouble.
 647     */
 648    assert(tile_x % 4 == 0);
 649    assert(tile_y % 2 == 0);
 650    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 651               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 652               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 653
 654    if (brw->gen < 6) {
 655       /* _NEW_COLOR */
 656       if (!ctx->Color.ColorLogicOpEnabled &&
 657           (ctx->Color.BlendEnabled & (1 << unit)))
 658          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 659
 660       if (!ctx->Color.ColorMask[unit][0])
 661          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 662       if (!ctx->Color.ColorMask[unit][1])
 663          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 664       if (!ctx->Color.ColorMask[unit][2])
 665          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 666
 667       /* As mentioned above, disable writes to the alpha component when the
 668        * renderbuffer is XRGB.
 669        */
 670       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 671           !ctx->Color.ColorMask[unit][3]) {
 672          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 673       }
 674    }
 675
 676    drm_intel_bo_emit_reloc(brw->batch.bo,
 677                            brw->wm.base.surf_offset[surf_index] + 4,
 678                            region->bo,
 679                            surf[1] - region->bo->offset,
 680                            I915_GEM_DOMAIN_RENDER,
 681                            I915_GEM_DOMAIN_RENDER);
 682 }
 683
 684 /**
 685  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 686  */
 687 static void
 688 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 689 {
 690    struct gl_context *ctx = &brw->ctx;
 691    GLuint i;
 692
 693    /* _NEW_BUFFERS | _NEW_COLOR */
 694    /* Update surfaces for drawing buffers */
 695    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 696       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 697          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 698             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 699                                                   ctx->DrawBuffer->Layered, i);
 700          } else {
 701             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 702          }
 703       }
 704    } else {
 705       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 706    }
 707    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 708 }
 709
 710 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 711    .dirty = {
 712       .mesa = (_NEW_COLOR |
 713                _NEW_BUFFERS),
 714       .brw = BRW_NEW_BATCH,
 715       .cache = 0
 716    },
 717    .emit = brw_update_renderbuffer_surfaces,
 718 };
 719
 720 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 721    .dirty = {
 722       .mesa = _NEW_BUFFERS,
 723       .brw = BRW_NEW_BATCH,
 724       .cache = 0
 725    },
 726    .emit = brw_update_renderbuffer_surfaces,
 727 };
 728
 729
 730 static void
 731 update_stage_texture_surfaces(struct brw_context *brw,
 732                               const struct gl_program *prog,
 733                               struct brw_stage_state *stage_state,
 734                               bool for_gather)
 735 {
 736    if (!prog)
 737       return;
 738
 739    struct gl_context *ctx = &brw->ctx;
 740
 741    uint32_t *surf_offset = stage_state->surf_offset;
 742    if (for_gather)
 743       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 744    else
 745       surf_offset += stage_state->prog_data->binding_table.texture_start;
 746
 747    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 748    for (unsigned s = 0; s < num_samplers; s++) {
 749       surf_offset[s] = 0;
 750
 751       if (prog->SamplersUsed & (1 << s)) {
 752          const unsigned unit = prog->SamplerUnits[s];
 753
 754          /* _NEW_TEXTURE */
 755          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 756             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 757          }
 758       }
 759    }
 760 }
 761
 762
 763 /**
 764  * Construct SURFACE_STATE objects for enabled textures.
 765  */
 766 static void
 767 brw_update_texture_surfaces(struct brw_context *brw)
 768 {
 769    /* BRW_NEW_VERTEX_PROGRAM */
 770    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 771
 772    /* BRW_NEW_GEOMETRY_PROGRAM */
 773    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 774
 775    /* BRW_NEW_FRAGMENT_PROGRAM */
 776    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 777
 778    /* _NEW_TEXTURE */
 779    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 780    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 781    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 782
 783    /* emit alternate set of surface state for gather. this
 784     * allows the surface format to be overriden for only the
 785     * gather4 messages. */
 786    if (vs && vs->UsesGather)
 787       update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 788    if (gs && gs->UsesGather)
 789       update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 790    if (fs && fs->UsesGather)
 791       update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 792
 793    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 794 }
 795
 796 const struct brw_tracked_state brw_texture_surfaces = {
 797    .dirty = {
 798       .mesa = _NEW_TEXTURE,
 799       .brw = BRW_NEW_BATCH |
 800              BRW_NEW_UNIFORM_BUFFER |
 801              BRW_NEW_VERTEX_PROGRAM |
 802              BRW_NEW_GEOMETRY_PROGRAM |
 803              BRW_NEW_FRAGMENT_PROGRAM,
 804       .cache = 0
 805    },
 806    .emit = brw_update_texture_surfaces,
 807 };
 808
 809 void
 810 brw_upload_ubo_surfaces(struct brw_context *brw,
 811                         struct gl_shader *shader,
 812                         struct brw_stage_state *stage_state,
 813                         struct brw_stage_prog_data *prog_data)
 814 {
 815    struct gl_context *ctx = &brw->ctx;
 816
 817    if (!shader)
 818       return;
 819
 820    uint32_t *surf_offsets =
 821       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 822
 823    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 824       struct gl_uniform_buffer_binding *binding;
 825       struct intel_buffer_object *intel_bo;
 826
 827       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 828       intel_bo = intel_buffer_object(binding->BufferObject);
 829       drm_intel_bo *bo =
 830          intel_bufferobj_buffer(brw, intel_bo,
 831                                 binding->Offset,
 832                                 binding->BufferObject->Size - binding->Offset);
 833
 834       /* Because behavior for referencing outside of the binding's size in the
 835        * glBindBufferRange case is undefined, we can just bind the whole buffer
 836        * glBindBufferBase wants and be a correct implementation.
 837        */
 838       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 839                                         bo->size - binding->Offset,
 840                                         &surf_offsets[i],
 841                                         shader->Type == GL_FRAGMENT_SHADER);
 842    }
 843
 844    if (shader->NumUniformBlocks)
 845       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 846 }
 847
 848 static void
 849 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 850 {
 851    struct gl_context *ctx = &brw->ctx;
 852    /* _NEW_PROGRAM */
 853    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 854
 855    if (!prog)
 856       return;
 857
 858    /* CACHE_NEW_WM_PROG */
 859    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 860                            &brw->wm.base, &brw->wm.prog_data->base);
 861 }
 862
 863 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 864    .dirty = {
 865       .mesa = _NEW_PROGRAM,
 866       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 867       .cache = CACHE_NEW_WM_PROG,
 868    },
 869    .emit = brw_upload_wm_ubo_surfaces,
 870 };
 871
 872 void
 873 gen4_init_vtable_surface_functions(struct brw_context *brw)
 874 {
 875    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 876    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 877    brw->vtbl.update_null_renderbuffer_surface =
 878       brw_update_null_renderbuffer_surface;
 879    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 880 }