src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193
 194 static void
 195 brw_update_buffer_texture_surface(struct gl_context *ctx,
 196                                   unsigned unit,
 197                                   uint32_t *binding_table,
 198                                   unsigned surf_index)
 199 {
 200    struct brw_context *brw = brw_context(ctx);
 201    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 202    uint32_t *surf;
 203    struct intel_buffer_object *intel_obj =
 204       intel_buffer_object(tObj->BufferObject);
 205    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 206    gl_format format = tObj->_BufferObjectFormat;
 207    uint32_t brw_format = brw_format_for_mesa_format(format);
 208    int texel_size = _mesa_get_format_bytes(format);
 209
 210    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 211       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 212                     _mesa_get_format_name(format));
 213    }
 214
 215    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 216                           6 * 4, 32, &binding_table[surf_index]);
 217
 218    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 219               (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
 220
 221    if (brw->gen >= 6)
 222       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 223
 224    if (bo) {
 225       surf[1] = bo->offset; /* reloc */
 226
 227       /* Emit relocation to surface contents. */
 228       drm_intel_bo_emit_reloc(brw->batch.bo,
 229                               binding_table[surf_index] + 4,
 230                               bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
 231
 232       int w = intel_obj->Base.Size / texel_size;
 233       surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 234                  ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 235       surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 236                  (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
 237    } else {
 238       surf[1] = 0;
 239       surf[2] = 0;
 240       surf[3] = 0;
 241    }
 242
 243    surf[4] = 0;
 244    surf[5] = 0;
 245 }
 246
 247 static void
 248 brw_update_texture_surface(struct gl_context *ctx,
 249                            unsigned unit,
 250                            uint32_t *binding_table,
 251                            unsigned surf_index)
 252 {
 253    struct brw_context *brw = brw_context(ctx);
 254    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 255    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 256    struct intel_mipmap_tree *mt = intelObj->mt;
 257    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
 258    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 259    uint32_t *surf;
 260    uint32_t tile_x, tile_y;
 261
 262    if (tObj->Target == GL_TEXTURE_BUFFER) {
 263       brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
 264       return;
 265    }
 266
 267    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 268                           6 * 4, 32, &binding_table[surf_index]);
 269
 270    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 271               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 272               BRW_SURFACE_CUBEFACE_ENABLES |
 273               (translate_tex_format(brw,
 274                                     mt->format,
 275                                     tObj->DepthMode,
 276                                     sampler->sRGBDecode) <<
 277                BRW_SURFACE_FORMAT_SHIFT));
 278
 279    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 280    surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
 281                                              &tile_x, &tile_y);
 282
 283    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 284               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 285               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 286
 287    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 288               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 289               (intelObj->mt->region->pitch - 1) <<
 290               BRW_SURFACE_PITCH_SHIFT);
 291
 292    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
 293
 294    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 295    /* Note that the low bits of these fields are missing, so
 296     * there's the possibility of getting in trouble.
 297     */
 298    assert(tile_x % 4 == 0);
 299    assert(tile_y % 2 == 0);
 300    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 301               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 302               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 303
 304    /* Emit relocation to surface contents */
 305    drm_intel_bo_emit_reloc(brw->batch.bo,
 306                            binding_table[surf_index] + 4,
 307                            intelObj->mt->region->bo,
 308                            surf[1] - intelObj->mt->region->bo->offset,
 309                            I915_GEM_DOMAIN_SAMPLER, 0);
 310 }
 311
 312 /**
 313  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 314  * read from this buffer with Data Port Read instructions/messages.
 315  */
 316 static void
 317 brw_create_constant_surface(struct brw_context *brw,
 318                             drm_intel_bo *bo,
 319                             uint32_t offset,
 320                             uint32_t size,
 321                             uint32_t *out_offset,
 322                             bool dword_pitch)
 323 {
 324    uint32_t stride = dword_pitch ? 4 : 16;
 325    uint32_t elements = ALIGN(size, stride) / stride;
 326    const GLint w = elements - 1;
 327    uint32_t *surf;
 328
 329    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 330                           6 * 4, 32, out_offset);
 331
 332    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 333               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 334               BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
 335
 336    if (brw->gen >= 6)
 337       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 338
 339    surf[1] = bo->offset + offset; /* reloc */
 340
 341    surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 342               ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 343
 344    surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 345               (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 346
 347    surf[4] = 0;
 348    surf[5] = 0;
 349
 350    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 351     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 352     * physical cache.  It is mapped in hardware to the sampler cache."
 353     */
 354    drm_intel_bo_emit_reloc(brw->batch.bo,
 355                            *out_offset + 4,
 356                            bo, offset,
 357                            I915_GEM_DOMAIN_SAMPLER, 0);
 358 }
 359
 360 /**
 361  * Set up a binding table entry for use by stream output logic (transform
 362  * feedback).
 363  *
 364  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 365  */
 366 void
 367 brw_update_sol_surface(struct brw_context *brw,
 368                        struct gl_buffer_object *buffer_obj,
 369                        uint32_t *out_offset, unsigned num_vector_components,
 370                        unsigned stride_dwords, unsigned offset_dwords)
 371 {
 372    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 373    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 374    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 375                                     out_offset);
 376    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 377    uint32_t offset_bytes = 4 * offset_dwords;
 378    size_t size_dwords = buffer_obj->Size / 4;
 379    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 380
 381    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 382     * too big to map using a single binding table entry?
 383     */
 384    assert((size_dwords - offset_dwords) / stride_dwords
 385           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 386
 387    if (size_dwords > offset_dwords + num_vector_components) {
 388       /* There is room for at least 1 transform feedback output in the buffer.
 389        * Compute the number of additional transform feedback outputs the
 390        * buffer has room for.
 391        */
 392       buffer_size_minus_1 =
 393          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 394    } else {
 395       /* There isn't even room for a single transform feedback output in the
 396        * buffer.  We can't configure the binding table entry to prevent output
 397        * entirely; we'll have to rely on the geometry shader to detect
 398        * overflow.  But to minimize the damage in case of a bug, set up the
 399        * binding table entry to just allow a single output.
 400        */
 401       buffer_size_minus_1 = 0;
 402    }
 403    width = buffer_size_minus_1 & 0x7f;
 404    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 405    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 406
 407    switch (num_vector_components) {
 408    case 1:
 409       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 410       break;
 411    case 2:
 412       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 413       break;
 414    case 3:
 415       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 416       break;
 417    case 4:
 418       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 419       break;
 420    default:
 421       assert(!"Invalid vector size for transform feedback output");
 422       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 423       break;
 424    }
 425
 426    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 427       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 428       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 429       BRW_SURFACE_RC_READ_WRITE;
 430    surf[1] = bo->offset + offset_bytes; /* reloc */
 431    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 432               height << BRW_SURFACE_HEIGHT_SHIFT);
 433    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 434               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 435    surf[4] = 0;
 436    surf[5] = 0;
 437
 438    /* Emit relocation to surface contents. */
 439    drm_intel_bo_emit_reloc(brw->batch.bo,
 440                            *out_offset + 4,
 441                            bo, offset_bytes,
 442                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 443 }
 444
 445 /* Creates a new WM constant buffer reflecting the current fragment program's
 446  * constants, if needed by the fragment program.
 447  *
 448  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 449  * state atom.
 450  */
 451 static void
 452 brw_upload_wm_pull_constants(struct brw_context *brw)
 453 {
 454    struct gl_context *ctx = &brw->ctx;
 455    /* BRW_NEW_FRAGMENT_PROGRAM */
 456    struct brw_fragment_program *fp =
 457       (struct brw_fragment_program *) brw->fragment_program;
 458    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 459    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 460    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 461    float *constants;
 462    unsigned int i;
 463
 464    _mesa_load_state_parameters(ctx, params);
 465
 466    /* CACHE_NEW_WM_PROG */
 467    if (brw->wm.prog_data->nr_pull_params == 0) {
 468       if (brw->wm.const_bo) {
 469          drm_intel_bo_unreference(brw->wm.const_bo);
 470          brw->wm.const_bo = NULL;
 471          brw->wm.surf_offset[surf_index] = 0;
 472          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 473       }
 474       return;
 475    }
 476
 477    drm_intel_bo_unreference(brw->wm.const_bo);
 478    brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 479                                          size, 64);
 480
 481    /* _NEW_PROGRAM_CONSTANTS */
 482    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
 483    constants = brw->wm.const_bo->virtual;
 484    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 485       constants[i] = *brw->wm.prog_data->pull_param[i];
 486    }
 487    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 488
 489    brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
 490                                      &brw->wm.surf_offset[surf_index],
 491                                      true);
 492
 493    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 494 }
 495
 496 const struct brw_tracked_state brw_wm_pull_constants = {
 497    .dirty = {
 498       .mesa = (_NEW_PROGRAM_CONSTANTS),
 499       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 500       .cache = CACHE_NEW_WM_PROG,
 501    },
 502    .emit = brw_upload_wm_pull_constants,
 503 };
 504
 505 static void
 506 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 507 {
 508    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 509     * Notes):
 510     *
 511     *     A null surface will be used in instances where an actual surface is
 512     *     not bound. When a write message is generated to a null surface, no
 513     *     actual surface is written to. When a read message (including any
 514     *     sampling engine message) is generated to a null surface, the result
 515     *     is all zeros. Note that a null surface type is allowed to be used
 516     *     with all messages, even if it is not specificially indicated as
 517     *     supported. All of the remaining fields in surface state are ignored
 518     *     for null surfaces, with the following exceptions:
 519     *
 520     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 521     *       depth buffer’s corresponding state for all render target surfaces,
 522     *       including null.
 523     *
 524     *     - Surface Format must be R8G8B8A8_UNORM.
 525     */
 526    struct gl_context *ctx = &brw->ctx;
 527    uint32_t *surf;
 528    unsigned surface_type = BRW_SURFACE_NULL;
 529    drm_intel_bo *bo = NULL;
 530    unsigned pitch_minus_1 = 0;
 531    uint32_t multisampling_state = 0;
 532
 533    /* _NEW_BUFFERS */
 534    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 535
 536    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 537                           &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
 538
 539    if (fb->Visual.samples > 1) {
 540       /* On Gen6, null render targets seem to cause GPU hangs when
 541        * multisampling.  So work around this problem by rendering into dummy
 542        * color buffer.
 543        *
 544        * To decrease the amount of memory needed by the workaround buffer, we
 545        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 546        * the amount of memory needed for the workaround buffer is
 547        * (width_in_tiles + height_in_tiles - 1) tiles.
 548        *
 549        * Note that since the workaround buffer will be interpreted by the
 550        * hardware as an interleaved multisampled buffer, we need to compute
 551        * width_in_tiles and height_in_tiles by dividing the width and height
 552        * by 16 rather than the normal Y-tile size of 32.
 553        */
 554       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 555       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 556       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 557       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 558                          size_needed);
 559       bo = brw->wm.multisampled_null_render_target_bo;
 560       surface_type = BRW_SURFACE_2D;
 561       pitch_minus_1 = 127;
 562       multisampling_state =
 563          brw_get_surface_num_multisamples(fb->Visual.samples);
 564    }
 565
 566    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 567               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 568    if (brw->gen < 6) {
 569       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 570                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 571                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 572                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 573    }
 574    surf[1] = bo ? bo->offset : 0;
 575    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 576               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 577
 578    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 579     * Notes):
 580     *
 581     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 582     */
 583    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 584               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 585    surf[4] = multisampling_state;
 586    surf[5] = 0;
 587
 588    if (bo) {
 589       drm_intel_bo_emit_reloc(brw->batch.bo,
 590                               brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 591                               bo, 0,
 592                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 593    }
 594 }
 595
 596 /**
 597  * Sets up a surface state structure to point at the given region.
 598  * While it is only used for the front/back buffer currently, it should be
 599  * usable for further buffers when doing ARB_draw_buffer support.
 600  */
 601 static void
 602 brw_update_renderbuffer_surface(struct brw_context *brw,
 603                                 struct gl_renderbuffer *rb,
 604                                 bool layered,
 605                                 unsigned int unit)
 606 {
 607    struct gl_context *ctx = &brw->ctx;
 608    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 609    struct intel_mipmap_tree *mt = irb->mt;
 610    struct intel_region *region;
 611    uint32_t *surf;
 612    uint32_t tile_x, tile_y;
 613    uint32_t format = 0;
 614    /* _NEW_BUFFERS */
 615    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 616
 617    assert(!layered);
 618
 619    if (rb->TexImage && !brw->has_surface_tile_offset) {
 620       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 621
 622       if (tile_x != 0 || tile_y != 0) {
 623          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 624           * destination in a miptree unless you actually setup your renderbuffer
 625           * as a miptree and used the fragile lod/array_index/etc. controls to
 626           * select the image.  So, instead, we just make a new single-level
 627           * miptree and render into that.
 628           */
 629          intel_renderbuffer_move_to_temp(brw, irb, false);
 630          mt = irb->mt;
 631       }
 632    }
 633
 634    intel_miptree_used_for_rendering(irb->mt);
 635
 636    region = irb->mt->region;
 637
 638    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 639                           &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
 640
 641    format = brw->render_target_format[rb_format];
 642    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 643       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 644                     __FUNCTION__, _mesa_get_format_name(rb_format));
 645    }
 646
 647    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 648               format << BRW_SURFACE_FORMAT_SHIFT);
 649
 650    /* reloc */
 651    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 652               region->bo->offset);
 653
 654    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 655               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 656
 657    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 658               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 659
 660    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 661
 662    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 663    /* Note that the low bits of these fields are missing, so
 664     * there's the possibility of getting in trouble.
 665     */
 666    assert(tile_x % 4 == 0);
 667    assert(tile_y % 2 == 0);
 668    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 669               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 670               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 671
 672    if (brw->gen < 6) {
 673       /* _NEW_COLOR */
 674       if (!ctx->Color.ColorLogicOpEnabled &&
 675           (ctx->Color.BlendEnabled & (1 << unit)))
 676          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 677
 678       if (!ctx->Color.ColorMask[unit][0])
 679          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 680       if (!ctx->Color.ColorMask[unit][1])
 681          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 682       if (!ctx->Color.ColorMask[unit][2])
 683          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 684
 685       /* As mentioned above, disable writes to the alpha component when the
 686        * renderbuffer is XRGB.
 687        */
 688       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 689           !ctx->Color.ColorMask[unit][3]) {
 690          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 691       }
 692    }
 693
 694    drm_intel_bo_emit_reloc(brw->batch.bo,
 695                            brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 696                            region->bo,
 697                            surf[1] - region->bo->offset,
 698                            I915_GEM_DOMAIN_RENDER,
 699                            I915_GEM_DOMAIN_RENDER);
 700 }
 701
 702 /**
 703  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 704  */
 705 static void
 706 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 707 {
 708    struct gl_context *ctx = &brw->ctx;
 709    GLuint i;
 710
 711    /* _NEW_BUFFERS | _NEW_COLOR */
 712    /* Update surfaces for drawing buffers */
 713    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 714       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 715          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 716             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 717                                                   ctx->DrawBuffer->Layered, i);
 718          } else {
 719             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 720          }
 721       }
 722    } else {
 723       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 724    }
 725    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 726 }
 727
 728 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 729    .dirty = {
 730       .mesa = (_NEW_COLOR |
 731                _NEW_BUFFERS),
 732       .brw = BRW_NEW_BATCH,
 733       .cache = 0
 734    },
 735    .emit = brw_update_renderbuffer_surfaces,
 736 };
 737
 738 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 739    .dirty = {
 740       .mesa = _NEW_BUFFERS,
 741       .brw = BRW_NEW_BATCH,
 742       .cache = 0
 743    },
 744    .emit = brw_update_renderbuffer_surfaces,
 745 };
 746
 747 /**
 748  * Construct SURFACE_STATE objects for enabled textures.
 749  */
 750 static void
 751 brw_update_texture_surfaces(struct brw_context *brw)
 752 {
 753    struct gl_context *ctx = &brw->ctx;
 754
 755    /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
 756     * Unfortunately, we're stuck using the gl_program structs until the
 757     * ARB_fragment_program front-end gets converted to GLSL IR.  These
 758     * have the downside that SamplerUnits is split and only contains the
 759     * mappings for samplers active in that stage.
 760     */
 761    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 762    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 763
 764    unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
 765
 766    for (unsigned s = 0; s < num_samplers; s++) {
 767       brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
 768       brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
 769
 770       if (vs->SamplersUsed & (1 << s)) {
 771          const unsigned unit = vs->SamplerUnits[s];
 772
 773          /* _NEW_TEXTURE */
 774          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 775             brw->vtbl.update_texture_surface(ctx, unit,
 776                                              brw->vs.surf_offset,
 777                                              SURF_INDEX_VS_TEXTURE(s));
 778          }
 779       }
 780
 781       if (fs->SamplersUsed & (1 << s)) {
 782          const unsigned unit = fs->SamplerUnits[s];
 783
 784          /* _NEW_TEXTURE */
 785          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 786             brw->vtbl.update_texture_surface(ctx, unit,
 787                                              brw->wm.surf_offset,
 788                                              SURF_INDEX_TEXTURE(s));
 789          }
 790       }
 791    }
 792
 793    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 794 }
 795
 796 const struct brw_tracked_state brw_texture_surfaces = {
 797    .dirty = {
 798       .mesa = _NEW_TEXTURE,
 799       .brw = BRW_NEW_BATCH |
 800              BRW_NEW_VERTEX_PROGRAM |
 801              BRW_NEW_FRAGMENT_PROGRAM,
 802       .cache = 0
 803    },
 804    .emit = brw_update_texture_surfaces,
 805 };
 806
 807 void
 808 brw_upload_ubo_surfaces(struct brw_context *brw,
 809                         struct gl_shader *shader,
 810                         uint32_t *surf_offsets)
 811 {
 812    struct gl_context *ctx = &brw->ctx;
 813
 814    if (!shader)
 815       return;
 816
 817    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 818       struct gl_uniform_buffer_binding *binding;
 819       struct intel_buffer_object *intel_bo;
 820
 821       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 822       intel_bo = intel_buffer_object(binding->BufferObject);
 823       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 824
 825       /* Because behavior for referencing outside of the binding's size in the
 826        * glBindBufferRange case is undefined, we can just bind the whole buffer
 827        * glBindBufferBase wants and be a correct implementation.
 828        */
 829       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 830                                         bo->size - binding->Offset,
 831                                         &surf_offsets[i],
 832                                         shader->Type == GL_FRAGMENT_SHADER);
 833    }
 834
 835    if (shader->NumUniformBlocks)
 836       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 837 }
 838
 839 static void
 840 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 841 {
 842    struct gl_context *ctx = &brw->ctx;
 843    /* _NEW_PROGRAM */
 844    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 845
 846    if (!prog)
 847       return;
 848
 849    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 850                            &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
 851 }
 852
 853 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 854    .dirty = {
 855       .mesa = _NEW_PROGRAM,
 856       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 857       .cache = 0,
 858    },
 859    .emit = brw_upload_wm_ubo_surfaces,
 860 };
 861
 862 /**
 863  * Constructs the binding table for the WM surface state, which maps unit
 864  * numbers to surface state objects.
 865  */
 866 static void
 867 brw_upload_wm_binding_table(struct brw_context *brw)
 868 {
 869    uint32_t *bind;
 870    int i;
 871
 872    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 873       gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
 874    }
 875
 876    /* CACHE_NEW_WM_PROG */
 877    unsigned entries = brw->wm.prog_data->binding_table_size;
 878    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 879                           sizeof(uint32_t) * entries,
 880                           32, &brw->wm.bind_bo_offset);
 881
 882    /* BRW_NEW_SURFACES */
 883    for (i = 0; i < entries; i++) {
 884       bind[i] = brw->wm.surf_offset[i];
 885    }
 886
 887    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 888 }
 889
 890 const struct brw_tracked_state brw_wm_binding_table = {
 891    .dirty = {
 892       .mesa = 0,
 893       .brw = (BRW_NEW_BATCH |
 894               BRW_NEW_SURFACES),
 895       .cache = CACHE_NEW_WM_PROG
 896    },
 897    .emit = brw_upload_wm_binding_table,
 898 };
 899
 900 void
 901 gen4_init_vtable_surface_functions(struct brw_context *brw)
 902 {
 903    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 904    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 905    brw->vtbl.update_null_renderbuffer_surface =
 906       brw_update_null_renderbuffer_surface;
 907    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 908 }