src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch)
 201 {
 202    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 203                                     6 * 4, 32, out_offset);
 204    memset(surf, 0, 6 * 4);
 205
 206    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 207              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 208              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 209    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 210    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 211              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 212    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 213              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 214
 215    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 216     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 217     * physical cache.  It is mapped in hardware to the sampler cache."
 218     */
 219    if (bo) {
 220       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 221                               bo, buffer_offset,
 222                               I915_GEM_DOMAIN_SAMPLER, 0);
 223    }
 224 }
 225
 226 static void
 227 brw_update_buffer_texture_surface(struct gl_context *ctx,
 228                                   unsigned unit,
 229                                   uint32_t *surf_offset)
 230 {
 231    struct brw_context *brw = brw_context(ctx);
 232    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 233    struct intel_buffer_object *intel_obj =
 234       intel_buffer_object(tObj->BufferObject);
 235    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 236    gl_format format = tObj->_BufferObjectFormat;
 237    uint32_t brw_format = brw_format_for_mesa_format(format);
 238    int texel_size = _mesa_get_format_bytes(format);
 239    int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
 240
 241    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 242       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 243                     _mesa_get_format_name(format));
 244    }
 245
 246    gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
 247                                   brw_format,
 248                                   w, texel_size);
 249 }
 250
 251 static void
 252 brw_update_texture_surface(struct gl_context *ctx,
 253                            unsigned unit,
 254                            uint32_t *surf_offset)
 255 {
 256    struct brw_context *brw = brw_context(ctx);
 257    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 258    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 259    struct intel_mipmap_tree *mt = intelObj->mt;
 260    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
 261    struct intel_texture_image *intel_image = intel_texture_image(firstImage);
 262    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 263    uint32_t *surf;
 264
 265    if (tObj->Target == GL_TEXTURE_BUFFER) {
 266       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 267       return;
 268    }
 269
 270    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 271                           6 * 4, 32, surf_offset);
 272
 273    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 274               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 275               BRW_SURFACE_CUBEFACE_ENABLES |
 276               (translate_tex_format(brw,
 277                                     mt->format,
 278                                     tObj->DepthMode,
 279                                     sampler->sRGBDecode) <<
 280                BRW_SURFACE_FORMAT_SHIFT));
 281
 282    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 283
 284    surf[2] = ((intelObj->_MaxLevel - intel_image->mt->first_level) << BRW_SURFACE_LOD_SHIFT |
 285               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 286               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 287
 288    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 289               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 290               (intelObj->mt->region->pitch - 1) <<
 291               BRW_SURFACE_PITCH_SHIFT);
 292
 293    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
 294
 295    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 296
 297    /* Emit relocation to surface contents */
 298    drm_intel_bo_emit_reloc(brw->batch.bo,
 299                            *surf_offset + 4,
 300                            intelObj->mt->region->bo,
 301                            surf[1] - intelObj->mt->region->bo->offset,
 302                            I915_GEM_DOMAIN_SAMPLER, 0);
 303 }
 304
 305 /**
 306  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 307  * read from this buffer with Data Port Read instructions/messages.
 308  */
 309 static void
 310 brw_create_constant_surface(struct brw_context *brw,
 311                             drm_intel_bo *bo,
 312                             uint32_t offset,
 313                             uint32_t size,
 314                             uint32_t *out_offset,
 315                             bool dword_pitch)
 316 {
 317    uint32_t stride = dword_pitch ? 4 : 16;
 318    uint32_t elements = ALIGN(size, stride) / stride;
 319
 320    gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
 321                                   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 322                                   elements, stride);
 323 }
 324
 325 /**
 326  * Set up a binding table entry for use by stream output logic (transform
 327  * feedback).
 328  *
 329  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 330  */
 331 void
 332 brw_update_sol_surface(struct brw_context *brw,
 333                        struct gl_buffer_object *buffer_obj,
 334                        uint32_t *out_offset, unsigned num_vector_components,
 335                        unsigned stride_dwords, unsigned offset_dwords)
 336 {
 337    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 338    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 339    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 340                                     out_offset);
 341    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 342    uint32_t offset_bytes = 4 * offset_dwords;
 343    size_t size_dwords = buffer_obj->Size / 4;
 344    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 345
 346    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 347     * too big to map using a single binding table entry?
 348     */
 349    assert((size_dwords - offset_dwords) / stride_dwords
 350           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 351
 352    if (size_dwords > offset_dwords + num_vector_components) {
 353       /* There is room for at least 1 transform feedback output in the buffer.
 354        * Compute the number of additional transform feedback outputs the
 355        * buffer has room for.
 356        */
 357       buffer_size_minus_1 =
 358          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 359    } else {
 360       /* There isn't even room for a single transform feedback output in the
 361        * buffer.  We can't configure the binding table entry to prevent output
 362        * entirely; we'll have to rely on the geometry shader to detect
 363        * overflow.  But to minimize the damage in case of a bug, set up the
 364        * binding table entry to just allow a single output.
 365        */
 366       buffer_size_minus_1 = 0;
 367    }
 368    width = buffer_size_minus_1 & 0x7f;
 369    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 370    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 371
 372    switch (num_vector_components) {
 373    case 1:
 374       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 375       break;
 376    case 2:
 377       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 378       break;
 379    case 3:
 380       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 381       break;
 382    case 4:
 383       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 384       break;
 385    default:
 386       assert(!"Invalid vector size for transform feedback output");
 387       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 388       break;
 389    }
 390
 391    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 392       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 393       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 394       BRW_SURFACE_RC_READ_WRITE;
 395    surf[1] = bo->offset + offset_bytes; /* reloc */
 396    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 397               height << BRW_SURFACE_HEIGHT_SHIFT);
 398    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 399               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 400    surf[4] = 0;
 401    surf[5] = 0;
 402
 403    /* Emit relocation to surface contents. */
 404    drm_intel_bo_emit_reloc(brw->batch.bo,
 405                            *out_offset + 4,
 406                            bo, offset_bytes,
 407                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 408 }
 409
 410 /* Creates a new WM constant buffer reflecting the current fragment program's
 411  * constants, if needed by the fragment program.
 412  *
 413  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 414  * state atom.
 415  */
 416 static void
 417 brw_upload_wm_pull_constants(struct brw_context *brw)
 418 {
 419    struct gl_context *ctx = &brw->ctx;
 420    /* BRW_NEW_FRAGMENT_PROGRAM */
 421    struct brw_fragment_program *fp =
 422       (struct brw_fragment_program *) brw->fragment_program;
 423    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 424    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 425    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 426    float *constants;
 427    unsigned int i;
 428
 429    _mesa_load_state_parameters(ctx, params);
 430
 431    /* CACHE_NEW_WM_PROG */
 432    if (brw->wm.prog_data->nr_pull_params == 0) {
 433       if (brw->wm.base.const_bo) {
 434          drm_intel_bo_unreference(brw->wm.base.const_bo);
 435          brw->wm.base.const_bo = NULL;
 436          brw->wm.base.surf_offset[surf_index] = 0;
 437          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 438       }
 439       return;
 440    }
 441
 442    drm_intel_bo_unreference(brw->wm.base.const_bo);
 443    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 444                                          size, 64);
 445
 446    /* _NEW_PROGRAM_CONSTANTS */
 447    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 448    constants = brw->wm.base.const_bo->virtual;
 449    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 450       constants[i] = *brw->wm.prog_data->pull_param[i];
 451    }
 452    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 453
 454    brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 455                                      &brw->wm.base.surf_offset[surf_index],
 456                                      true);
 457
 458    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 459 }
 460
 461 const struct brw_tracked_state brw_wm_pull_constants = {
 462    .dirty = {
 463       .mesa = (_NEW_PROGRAM_CONSTANTS),
 464       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 465       .cache = CACHE_NEW_WM_PROG,
 466    },
 467    .emit = brw_upload_wm_pull_constants,
 468 };
 469
 470 static void
 471 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 472 {
 473    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 474     * Notes):
 475     *
 476     *     A null surface will be used in instances where an actual surface is
 477     *     not bound. When a write message is generated to a null surface, no
 478     *     actual surface is written to. When a read message (including any
 479     *     sampling engine message) is generated to a null surface, the result
 480     *     is all zeros. Note that a null surface type is allowed to be used
 481     *     with all messages, even if it is not specificially indicated as
 482     *     supported. All of the remaining fields in surface state are ignored
 483     *     for null surfaces, with the following exceptions:
 484     *
 485     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 486     *       depth buffer’s corresponding state for all render target surfaces,
 487     *       including null.
 488     *
 489     *     - Surface Format must be R8G8B8A8_UNORM.
 490     */
 491    struct gl_context *ctx = &brw->ctx;
 492    uint32_t *surf;
 493    unsigned surface_type = BRW_SURFACE_NULL;
 494    drm_intel_bo *bo = NULL;
 495    unsigned pitch_minus_1 = 0;
 496    uint32_t multisampling_state = 0;
 497
 498    /* _NEW_BUFFERS */
 499    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 500
 501    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 502                           &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
 503
 504    if (fb->Visual.samples > 1) {
 505       /* On Gen6, null render targets seem to cause GPU hangs when
 506        * multisampling.  So work around this problem by rendering into dummy
 507        * color buffer.
 508        *
 509        * To decrease the amount of memory needed by the workaround buffer, we
 510        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 511        * the amount of memory needed for the workaround buffer is
 512        * (width_in_tiles + height_in_tiles - 1) tiles.
 513        *
 514        * Note that since the workaround buffer will be interpreted by the
 515        * hardware as an interleaved multisampled buffer, we need to compute
 516        * width_in_tiles and height_in_tiles by dividing the width and height
 517        * by 16 rather than the normal Y-tile size of 32.
 518        */
 519       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 520       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 521       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 522       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 523                          size_needed);
 524       bo = brw->wm.multisampled_null_render_target_bo;
 525       surface_type = BRW_SURFACE_2D;
 526       pitch_minus_1 = 127;
 527       multisampling_state =
 528          brw_get_surface_num_multisamples(fb->Visual.samples);
 529    }
 530
 531    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 532               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 533    if (brw->gen < 6) {
 534       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 535                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 536                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 537                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 538    }
 539    surf[1] = bo ? bo->offset : 0;
 540    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 541               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 542
 543    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 544     * Notes):
 545     *
 546     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 547     */
 548    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 549               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 550    surf[4] = multisampling_state;
 551    surf[5] = 0;
 552
 553    if (bo) {
 554       drm_intel_bo_emit_reloc(brw->batch.bo,
 555                               brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 556                               bo, 0,
 557                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 558    }
 559 }
 560
 561 /**
 562  * Sets up a surface state structure to point at the given region.
 563  * While it is only used for the front/back buffer currently, it should be
 564  * usable for further buffers when doing ARB_draw_buffer support.
 565  */
 566 static void
 567 brw_update_renderbuffer_surface(struct brw_context *brw,
 568                                 struct gl_renderbuffer *rb,
 569                                 bool layered,
 570                                 unsigned int unit)
 571 {
 572    struct gl_context *ctx = &brw->ctx;
 573    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 574    struct intel_mipmap_tree *mt = irb->mt;
 575    struct intel_region *region;
 576    uint32_t *surf;
 577    uint32_t tile_x, tile_y;
 578    uint32_t format = 0;
 579    /* _NEW_BUFFERS */
 580    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 581
 582    assert(!layered);
 583
 584    if (rb->TexImage && !brw->has_surface_tile_offset) {
 585       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 586
 587       if (tile_x != 0 || tile_y != 0) {
 588          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 589           * destination in a miptree unless you actually setup your renderbuffer
 590           * as a miptree and used the fragile lod/array_index/etc. controls to
 591           * select the image.  So, instead, we just make a new single-level
 592           * miptree and render into that.
 593           */
 594          intel_renderbuffer_move_to_temp(brw, irb, false);
 595          mt = irb->mt;
 596       }
 597    }
 598
 599    intel_miptree_used_for_rendering(irb->mt);
 600
 601    region = irb->mt->region;
 602
 603    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 604                           &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
 605
 606    format = brw->render_target_format[rb_format];
 607    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 608       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 609                     __FUNCTION__, _mesa_get_format_name(rb_format));
 610    }
 611
 612    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 613               format << BRW_SURFACE_FORMAT_SHIFT);
 614
 615    /* reloc */
 616    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 617               region->bo->offset);
 618
 619    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 620               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 621
 622    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 623               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 624
 625    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 626
 627    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 628    /* Note that the low bits of these fields are missing, so
 629     * there's the possibility of getting in trouble.
 630     */
 631    assert(tile_x % 4 == 0);
 632    assert(tile_y % 2 == 0);
 633    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 634               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 635               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 636
 637    if (brw->gen < 6) {
 638       /* _NEW_COLOR */
 639       if (!ctx->Color.ColorLogicOpEnabled &&
 640           (ctx->Color.BlendEnabled & (1 << unit)))
 641          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 642
 643       if (!ctx->Color.ColorMask[unit][0])
 644          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 645       if (!ctx->Color.ColorMask[unit][1])
 646          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 647       if (!ctx->Color.ColorMask[unit][2])
 648          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 649
 650       /* As mentioned above, disable writes to the alpha component when the
 651        * renderbuffer is XRGB.
 652        */
 653       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 654           !ctx->Color.ColorMask[unit][3]) {
 655          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 656       }
 657    }
 658
 659    drm_intel_bo_emit_reloc(brw->batch.bo,
 660                            brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 661                            region->bo,
 662                            surf[1] - region->bo->offset,
 663                            I915_GEM_DOMAIN_RENDER,
 664                            I915_GEM_DOMAIN_RENDER);
 665 }
 666
 667 /**
 668  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 669  */
 670 static void
 671 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 672 {
 673    struct gl_context *ctx = &brw->ctx;
 674    GLuint i;
 675
 676    /* _NEW_BUFFERS | _NEW_COLOR */
 677    /* Update surfaces for drawing buffers */
 678    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 679       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 680          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 681             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 682                                                   ctx->DrawBuffer->Layered, i);
 683          } else {
 684             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 685          }
 686       }
 687    } else {
 688       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 689    }
 690    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 691 }
 692
 693 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 694    .dirty = {
 695       .mesa = (_NEW_COLOR |
 696                _NEW_BUFFERS),
 697       .brw = BRW_NEW_BATCH,
 698       .cache = 0
 699    },
 700    .emit = brw_update_renderbuffer_surfaces,
 701 };
 702
 703 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 704    .dirty = {
 705       .mesa = _NEW_BUFFERS,
 706       .brw = BRW_NEW_BATCH,
 707       .cache = 0
 708    },
 709    .emit = brw_update_renderbuffer_surfaces,
 710 };
 711
 712
 713 static void
 714 update_stage_texture_surfaces(struct brw_context *brw,
 715                               const struct gl_program *prog,
 716                               uint32_t *surf_offset)
 717 {
 718    if (!prog)
 719       return;
 720
 721    struct gl_context *ctx = &brw->ctx;
 722
 723    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 724
 725    for (unsigned s = 0; s < num_samplers; s++) {
 726       surf_offset[s] = 0;
 727
 728       if (prog->SamplersUsed & (1 << s)) {
 729          const unsigned unit = prog->SamplerUnits[s];
 730
 731          /* _NEW_TEXTURE */
 732          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 733             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s);
 734          }
 735       }
 736    }
 737 }
 738
 739
 740 /**
 741  * Construct SURFACE_STATE objects for enabled textures.
 742  */
 743 static void
 744 brw_update_texture_surfaces(struct brw_context *brw)
 745 {
 746    /* BRW_NEW_VERTEX_PROGRAM */
 747    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 748
 749    /* BRW_NEW_GEOMETRY_PROGRAM */
 750    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 751
 752    /* BRW_NEW_FRAGMENT_PROGRAM */
 753    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 754
 755    /* _NEW_TEXTURE */
 756    update_stage_texture_surfaces(brw, vs,
 757                                  brw->vs.base.surf_offset +
 758                                  SURF_INDEX_VEC4_TEXTURE(0));
 759    update_stage_texture_surfaces(brw, gs,
 760                                  brw->gs.base.surf_offset +
 761                                  SURF_INDEX_VEC4_TEXTURE(0));
 762    update_stage_texture_surfaces(brw, fs,
 763                                  brw->wm.base.surf_offset +
 764                                  SURF_INDEX_TEXTURE(0));
 765
 766    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 767 }
 768
 769 const struct brw_tracked_state brw_texture_surfaces = {
 770    .dirty = {
 771       .mesa = _NEW_TEXTURE,
 772       .brw = BRW_NEW_BATCH |
 773              BRW_NEW_VERTEX_PROGRAM |
 774              BRW_NEW_GEOMETRY_PROGRAM |
 775              BRW_NEW_FRAGMENT_PROGRAM,
 776       .cache = 0
 777    },
 778    .emit = brw_update_texture_surfaces,
 779 };
 780
 781 void
 782 brw_upload_ubo_surfaces(struct brw_context *brw,
 783                         struct gl_shader *shader,
 784                         uint32_t *surf_offsets)
 785 {
 786    struct gl_context *ctx = &brw->ctx;
 787
 788    if (!shader)
 789       return;
 790
 791    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 792       struct gl_uniform_buffer_binding *binding;
 793       struct intel_buffer_object *intel_bo;
 794
 795       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 796       intel_bo = intel_buffer_object(binding->BufferObject);
 797       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 798
 799       /* Because behavior for referencing outside of the binding's size in the
 800        * glBindBufferRange case is undefined, we can just bind the whole buffer
 801        * glBindBufferBase wants and be a correct implementation.
 802        */
 803       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 804                                         bo->size - binding->Offset,
 805                                         &surf_offsets[i],
 806                                         shader->Type == GL_FRAGMENT_SHADER);
 807    }
 808
 809    if (shader->NumUniformBlocks)
 810       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 811 }
 812
 813 static void
 814 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 815 {
 816    struct gl_context *ctx = &brw->ctx;
 817    /* _NEW_PROGRAM */
 818    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 819
 820    if (!prog)
 821       return;
 822
 823    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 824                            &brw->wm.base.surf_offset[SURF_INDEX_WM_UBO(0)]);
 825 }
 826
 827 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 828    .dirty = {
 829       .mesa = _NEW_PROGRAM,
 830       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 831       .cache = 0,
 832    },
 833    .emit = brw_upload_wm_ubo_surfaces,
 834 };
 835
 836 void
 837 gen4_init_vtable_surface_functions(struct brw_context *brw)
 838 {
 839    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 840    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 841    brw->vtbl.update_null_renderbuffer_surface =
 842       brw_update_null_renderbuffer_surface;
 843    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 844 }