src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193
 194 static void
 195 brw_update_buffer_texture_surface(struct gl_context *ctx,
 196                                   unsigned unit,
 197                                   uint32_t *surf_offset)
 198 {
 199    struct brw_context *brw = brw_context(ctx);
 200    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 201    uint32_t *surf;
 202    struct intel_buffer_object *intel_obj =
 203       intel_buffer_object(tObj->BufferObject);
 204    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 205    gl_format format = tObj->_BufferObjectFormat;
 206    uint32_t brw_format = brw_format_for_mesa_format(format);
 207    int texel_size = _mesa_get_format_bytes(format);
 208
 209    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 210       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 211                     _mesa_get_format_name(format));
 212    }
 213
 214    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 215                           6 * 4, 32, surf_offset);
 216
 217    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 218               (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
 219
 220    if (brw->gen >= 6)
 221       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 222
 223    if (bo) {
 224       surf[1] = bo->offset; /* reloc */
 225
 226       /* Emit relocation to surface contents. */
 227       drm_intel_bo_emit_reloc(brw->batch.bo,
 228                               *surf_offset + 4,
 229                               bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
 230
 231       int w = intel_obj->Base.Size / texel_size;
 232       surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 233                  ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 234       surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 235                  (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
 236    } else {
 237       surf[1] = 0;
 238       surf[2] = 0;
 239       surf[3] = 0;
 240    }
 241
 242    surf[4] = 0;
 243    surf[5] = 0;
 244 }
 245
 246 static void
 247 brw_update_texture_surface(struct gl_context *ctx,
 248                            unsigned unit,
 249                            uint32_t *surf_offset)
 250 {
 251    struct brw_context *brw = brw_context(ctx);
 252    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 253    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 254    struct intel_mipmap_tree *mt = intelObj->mt;
 255    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
 256    struct intel_texture_image *intel_image = intel_texture_image(firstImage);
 257    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 258    uint32_t *surf;
 259
 260    if (tObj->Target == GL_TEXTURE_BUFFER) {
 261       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 262       return;
 263    }
 264
 265    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 266                           6 * 4, 32, surf_offset);
 267
 268    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 269               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 270               BRW_SURFACE_CUBEFACE_ENABLES |
 271               (translate_tex_format(brw,
 272                                     mt->format,
 273                                     tObj->DepthMode,
 274                                     sampler->sRGBDecode) <<
 275                BRW_SURFACE_FORMAT_SHIFT));
 276
 277    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 278
 279    surf[2] = ((intelObj->_MaxLevel - intel_image->mt->first_level) << BRW_SURFACE_LOD_SHIFT |
 280               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 281               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 282
 283    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 284               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 285               (intelObj->mt->region->pitch - 1) <<
 286               BRW_SURFACE_PITCH_SHIFT);
 287
 288    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
 289
 290    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 291
 292    /* Emit relocation to surface contents */
 293    drm_intel_bo_emit_reloc(brw->batch.bo,
 294                            *surf_offset + 4,
 295                            intelObj->mt->region->bo,
 296                            surf[1] - intelObj->mt->region->bo->offset,
 297                            I915_GEM_DOMAIN_SAMPLER, 0);
 298 }
 299
 300 /**
 301  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 302  * read from this buffer with Data Port Read instructions/messages.
 303  */
 304 static void
 305 brw_create_constant_surface(struct brw_context *brw,
 306                             drm_intel_bo *bo,
 307                             uint32_t offset,
 308                             uint32_t size,
 309                             uint32_t *out_offset,
 310                             bool dword_pitch)
 311 {
 312    uint32_t stride = dword_pitch ? 4 : 16;
 313    uint32_t elements = ALIGN(size, stride) / stride;
 314    const GLint w = elements - 1;
 315    uint32_t *surf;
 316
 317    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 318                           6 * 4, 32, out_offset);
 319
 320    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 321               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 322               BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
 323
 324    if (brw->gen >= 6)
 325       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 326
 327    surf[1] = bo->offset + offset; /* reloc */
 328
 329    surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 330               ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 331
 332    surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 333               (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 334
 335    surf[4] = 0;
 336    surf[5] = 0;
 337
 338    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 339     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 340     * physical cache.  It is mapped in hardware to the sampler cache."
 341     */
 342    drm_intel_bo_emit_reloc(brw->batch.bo,
 343                            *out_offset + 4,
 344                            bo, offset,
 345                            I915_GEM_DOMAIN_SAMPLER, 0);
 346 }
 347
 348 /**
 349  * Set up a binding table entry for use by stream output logic (transform
 350  * feedback).
 351  *
 352  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 353  */
 354 void
 355 brw_update_sol_surface(struct brw_context *brw,
 356                        struct gl_buffer_object *buffer_obj,
 357                        uint32_t *out_offset, unsigned num_vector_components,
 358                        unsigned stride_dwords, unsigned offset_dwords)
 359 {
 360    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 361    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 362    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 363                                     out_offset);
 364    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 365    uint32_t offset_bytes = 4 * offset_dwords;
 366    size_t size_dwords = buffer_obj->Size / 4;
 367    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 368
 369    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 370     * too big to map using a single binding table entry?
 371     */
 372    assert((size_dwords - offset_dwords) / stride_dwords
 373           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 374
 375    if (size_dwords > offset_dwords + num_vector_components) {
 376       /* There is room for at least 1 transform feedback output in the buffer.
 377        * Compute the number of additional transform feedback outputs the
 378        * buffer has room for.
 379        */
 380       buffer_size_minus_1 =
 381          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 382    } else {
 383       /* There isn't even room for a single transform feedback output in the
 384        * buffer.  We can't configure the binding table entry to prevent output
 385        * entirely; we'll have to rely on the geometry shader to detect
 386        * overflow.  But to minimize the damage in case of a bug, set up the
 387        * binding table entry to just allow a single output.
 388        */
 389       buffer_size_minus_1 = 0;
 390    }
 391    width = buffer_size_minus_1 & 0x7f;
 392    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 393    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 394
 395    switch (num_vector_components) {
 396    case 1:
 397       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 398       break;
 399    case 2:
 400       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 401       break;
 402    case 3:
 403       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 404       break;
 405    case 4:
 406       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 407       break;
 408    default:
 409       assert(!"Invalid vector size for transform feedback output");
 410       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 411       break;
 412    }
 413
 414    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 415       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 416       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 417       BRW_SURFACE_RC_READ_WRITE;
 418    surf[1] = bo->offset + offset_bytes; /* reloc */
 419    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 420               height << BRW_SURFACE_HEIGHT_SHIFT);
 421    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 422               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 423    surf[4] = 0;
 424    surf[5] = 0;
 425
 426    /* Emit relocation to surface contents. */
 427    drm_intel_bo_emit_reloc(brw->batch.bo,
 428                            *out_offset + 4,
 429                            bo, offset_bytes,
 430                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 431 }
 432
 433 /* Creates a new WM constant buffer reflecting the current fragment program's
 434  * constants, if needed by the fragment program.
 435  *
 436  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 437  * state atom.
 438  */
 439 static void
 440 brw_upload_wm_pull_constants(struct brw_context *brw)
 441 {
 442    struct gl_context *ctx = &brw->ctx;
 443    /* BRW_NEW_FRAGMENT_PROGRAM */
 444    struct brw_fragment_program *fp =
 445       (struct brw_fragment_program *) brw->fragment_program;
 446    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 447    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 448    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 449    float *constants;
 450    unsigned int i;
 451
 452    _mesa_load_state_parameters(ctx, params);
 453
 454    /* CACHE_NEW_WM_PROG */
 455    if (brw->wm.prog_data->nr_pull_params == 0) {
 456       if (brw->wm.const_bo) {
 457          drm_intel_bo_unreference(brw->wm.const_bo);
 458          brw->wm.const_bo = NULL;
 459          brw->wm.surf_offset[surf_index] = 0;
 460          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 461       }
 462       return;
 463    }
 464
 465    drm_intel_bo_unreference(brw->wm.const_bo);
 466    brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 467                                          size, 64);
 468
 469    /* _NEW_PROGRAM_CONSTANTS */
 470    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
 471    constants = brw->wm.const_bo->virtual;
 472    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 473       constants[i] = *brw->wm.prog_data->pull_param[i];
 474    }
 475    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 476
 477    brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
 478                                      &brw->wm.surf_offset[surf_index],
 479                                      true);
 480
 481    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 482 }
 483
 484 const struct brw_tracked_state brw_wm_pull_constants = {
 485    .dirty = {
 486       .mesa = (_NEW_PROGRAM_CONSTANTS),
 487       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 488       .cache = CACHE_NEW_WM_PROG,
 489    },
 490    .emit = brw_upload_wm_pull_constants,
 491 };
 492
 493 static void
 494 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 495 {
 496    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 497     * Notes):
 498     *
 499     *     A null surface will be used in instances where an actual surface is
 500     *     not bound. When a write message is generated to a null surface, no
 501     *     actual surface is written to. When a read message (including any
 502     *     sampling engine message) is generated to a null surface, the result
 503     *     is all zeros. Note that a null surface type is allowed to be used
 504     *     with all messages, even if it is not specificially indicated as
 505     *     supported. All of the remaining fields in surface state are ignored
 506     *     for null surfaces, with the following exceptions:
 507     *
 508     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 509     *       depth buffer’s corresponding state for all render target surfaces,
 510     *       including null.
 511     *
 512     *     - Surface Format must be R8G8B8A8_UNORM.
 513     */
 514    struct gl_context *ctx = &brw->ctx;
 515    uint32_t *surf;
 516    unsigned surface_type = BRW_SURFACE_NULL;
 517    drm_intel_bo *bo = NULL;
 518    unsigned pitch_minus_1 = 0;
 519    uint32_t multisampling_state = 0;
 520
 521    /* _NEW_BUFFERS */
 522    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 523
 524    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 525                           &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
 526
 527    if (fb->Visual.samples > 1) {
 528       /* On Gen6, null render targets seem to cause GPU hangs when
 529        * multisampling.  So work around this problem by rendering into dummy
 530        * color buffer.
 531        *
 532        * To decrease the amount of memory needed by the workaround buffer, we
 533        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 534        * the amount of memory needed for the workaround buffer is
 535        * (width_in_tiles + height_in_tiles - 1) tiles.
 536        *
 537        * Note that since the workaround buffer will be interpreted by the
 538        * hardware as an interleaved multisampled buffer, we need to compute
 539        * width_in_tiles and height_in_tiles by dividing the width and height
 540        * by 16 rather than the normal Y-tile size of 32.
 541        */
 542       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 543       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 544       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 545       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 546                          size_needed);
 547       bo = brw->wm.multisampled_null_render_target_bo;
 548       surface_type = BRW_SURFACE_2D;
 549       pitch_minus_1 = 127;
 550       multisampling_state =
 551          brw_get_surface_num_multisamples(fb->Visual.samples);
 552    }
 553
 554    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 555               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 556    if (brw->gen < 6) {
 557       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 558                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 559                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 560                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 561    }
 562    surf[1] = bo ? bo->offset : 0;
 563    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 564               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 565
 566    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 567     * Notes):
 568     *
 569     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 570     */
 571    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 572               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 573    surf[4] = multisampling_state;
 574    surf[5] = 0;
 575
 576    if (bo) {
 577       drm_intel_bo_emit_reloc(brw->batch.bo,
 578                               brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 579                               bo, 0,
 580                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 581    }
 582 }
 583
 584 /**
 585  * Sets up a surface state structure to point at the given region.
 586  * While it is only used for the front/back buffer currently, it should be
 587  * usable for further buffers when doing ARB_draw_buffer support.
 588  */
 589 static void
 590 brw_update_renderbuffer_surface(struct brw_context *brw,
 591                                 struct gl_renderbuffer *rb,
 592                                 bool layered,
 593                                 unsigned int unit)
 594 {
 595    struct gl_context *ctx = &brw->ctx;
 596    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 597    struct intel_mipmap_tree *mt = irb->mt;
 598    struct intel_region *region;
 599    uint32_t *surf;
 600    uint32_t tile_x, tile_y;
 601    uint32_t format = 0;
 602    /* _NEW_BUFFERS */
 603    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 604
 605    assert(!layered);
 606
 607    if (rb->TexImage && !brw->has_surface_tile_offset) {
 608       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 609
 610       if (tile_x != 0 || tile_y != 0) {
 611          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 612           * destination in a miptree unless you actually setup your renderbuffer
 613           * as a miptree and used the fragile lod/array_index/etc. controls to
 614           * select the image.  So, instead, we just make a new single-level
 615           * miptree and render into that.
 616           */
 617          intel_renderbuffer_move_to_temp(brw, irb, false);
 618          mt = irb->mt;
 619       }
 620    }
 621
 622    intel_miptree_used_for_rendering(irb->mt);
 623
 624    region = irb->mt->region;
 625
 626    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 627                           &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
 628
 629    format = brw->render_target_format[rb_format];
 630    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 631       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 632                     __FUNCTION__, _mesa_get_format_name(rb_format));
 633    }
 634
 635    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 636               format << BRW_SURFACE_FORMAT_SHIFT);
 637
 638    /* reloc */
 639    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 640               region->bo->offset);
 641
 642    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 643               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 644
 645    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 646               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 647
 648    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 649
 650    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 651    /* Note that the low bits of these fields are missing, so
 652     * there's the possibility of getting in trouble.
 653     */
 654    assert(tile_x % 4 == 0);
 655    assert(tile_y % 2 == 0);
 656    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 657               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 658               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 659
 660    if (brw->gen < 6) {
 661       /* _NEW_COLOR */
 662       if (!ctx->Color.ColorLogicOpEnabled &&
 663           (ctx->Color.BlendEnabled & (1 << unit)))
 664          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 665
 666       if (!ctx->Color.ColorMask[unit][0])
 667          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 668       if (!ctx->Color.ColorMask[unit][1])
 669          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 670       if (!ctx->Color.ColorMask[unit][2])
 671          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 672
 673       /* As mentioned above, disable writes to the alpha component when the
 674        * renderbuffer is XRGB.
 675        */
 676       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 677           !ctx->Color.ColorMask[unit][3]) {
 678          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 679       }
 680    }
 681
 682    drm_intel_bo_emit_reloc(brw->batch.bo,
 683                            brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 684                            region->bo,
 685                            surf[1] - region->bo->offset,
 686                            I915_GEM_DOMAIN_RENDER,
 687                            I915_GEM_DOMAIN_RENDER);
 688 }
 689
 690 /**
 691  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 692  */
 693 static void
 694 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 695 {
 696    struct gl_context *ctx = &brw->ctx;
 697    GLuint i;
 698
 699    /* _NEW_BUFFERS | _NEW_COLOR */
 700    /* Update surfaces for drawing buffers */
 701    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 702       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 703          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 704             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 705                                                   ctx->DrawBuffer->Layered, i);
 706          } else {
 707             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 708          }
 709       }
 710    } else {
 711       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 712    }
 713    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 714 }
 715
 716 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 717    .dirty = {
 718       .mesa = (_NEW_COLOR |
 719                _NEW_BUFFERS),
 720       .brw = BRW_NEW_BATCH,
 721       .cache = 0
 722    },
 723    .emit = brw_update_renderbuffer_surfaces,
 724 };
 725
 726 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 727    .dirty = {
 728       .mesa = _NEW_BUFFERS,
 729       .brw = BRW_NEW_BATCH,
 730       .cache = 0
 731    },
 732    .emit = brw_update_renderbuffer_surfaces,
 733 };
 734
 735
 736 static void
 737 update_stage_texture_surfaces(struct brw_context *brw,
 738                               const struct gl_program *prog,
 739                               uint32_t *surf_offset)
 740 {
 741    if (!prog)
 742       return;
 743
 744    struct gl_context *ctx = &brw->ctx;
 745
 746    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 747
 748    for (unsigned s = 0; s < num_samplers; s++) {
 749       surf_offset[s] = 0;
 750
 751       if (prog->SamplersUsed & (1 << s)) {
 752          const unsigned unit = prog->SamplerUnits[s];
 753
 754          /* _NEW_TEXTURE */
 755          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 756             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s);
 757          }
 758       }
 759    }
 760 }
 761
 762
 763 /**
 764  * Construct SURFACE_STATE objects for enabled textures.
 765  */
 766 static void
 767 brw_update_texture_surfaces(struct brw_context *brw)
 768 {
 769    /* BRW_NEW_VERTEX_PROGRAM */
 770    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 771
 772    /* BRW_NEW_GEOMETRY_PROGRAM */
 773    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 774
 775    /* BRW_NEW_FRAGMENT_PROGRAM */
 776    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 777
 778    /* _NEW_TEXTURE */
 779    update_stage_texture_surfaces(brw, vs,
 780                                  brw->vs.base.surf_offset +
 781                                  SURF_INDEX_VEC4_TEXTURE(0));
 782    update_stage_texture_surfaces(brw, gs,
 783                                  brw->gs.base.surf_offset +
 784                                  SURF_INDEX_VEC4_TEXTURE(0));
 785    update_stage_texture_surfaces(brw, fs,
 786                                  brw->wm.surf_offset +
 787                                  SURF_INDEX_TEXTURE(0));
 788
 789    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 790 }
 791
 792 const struct brw_tracked_state brw_texture_surfaces = {
 793    .dirty = {
 794       .mesa = _NEW_TEXTURE,
 795       .brw = BRW_NEW_BATCH |
 796              BRW_NEW_VERTEX_PROGRAM |
 797              BRW_NEW_GEOMETRY_PROGRAM |
 798              BRW_NEW_FRAGMENT_PROGRAM,
 799       .cache = 0
 800    },
 801    .emit = brw_update_texture_surfaces,
 802 };
 803
 804 void
 805 brw_upload_ubo_surfaces(struct brw_context *brw,
 806                         struct gl_shader *shader,
 807                         uint32_t *surf_offsets)
 808 {
 809    struct gl_context *ctx = &brw->ctx;
 810
 811    if (!shader)
 812       return;
 813
 814    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 815       struct gl_uniform_buffer_binding *binding;
 816       struct intel_buffer_object *intel_bo;
 817
 818       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 819       intel_bo = intel_buffer_object(binding->BufferObject);
 820       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 821
 822       /* Because behavior for referencing outside of the binding's size in the
 823        * glBindBufferRange case is undefined, we can just bind the whole buffer
 824        * glBindBufferBase wants and be a correct implementation.
 825        */
 826       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 827                                         bo->size - binding->Offset,
 828                                         &surf_offsets[i],
 829                                         shader->Type == GL_FRAGMENT_SHADER);
 830    }
 831
 832    if (shader->NumUniformBlocks)
 833       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 834 }
 835
 836 static void
 837 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 838 {
 839    struct gl_context *ctx = &brw->ctx;
 840    /* _NEW_PROGRAM */
 841    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 842
 843    if (!prog)
 844       return;
 845
 846    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 847                            &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
 848 }
 849
 850 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 851    .dirty = {
 852       .mesa = _NEW_PROGRAM,
 853       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 854       .cache = 0,
 855    },
 856    .emit = brw_upload_wm_ubo_surfaces,
 857 };
 858
 859 /**
 860  * Constructs the binding table for the WM surface state, which maps unit
 861  * numbers to surface state objects.
 862  */
 863 static void
 864 brw_upload_wm_binding_table(struct brw_context *brw)
 865 {
 866    uint32_t *bind;
 867    int i;
 868
 869    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 870       gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
 871    }
 872
 873    /* CACHE_NEW_WM_PROG */
 874    unsigned entries = brw->wm.prog_data->binding_table_size;
 875    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 876                           sizeof(uint32_t) * entries,
 877                           32, &brw->wm.bind_bo_offset);
 878
 879    /* BRW_NEW_SURFACES */
 880    for (i = 0; i < entries; i++) {
 881       bind[i] = brw->wm.surf_offset[i];
 882    }
 883
 884    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 885 }
 886
 887 const struct brw_tracked_state brw_wm_binding_table = {
 888    .dirty = {
 889       .mesa = 0,
 890       .brw = (BRW_NEW_BATCH |
 891               BRW_NEW_SURFACES),
 892       .cache = CACHE_NEW_WM_PROG
 893    },
 894    .emit = brw_upload_wm_binding_table,
 895 };
 896
 897 void
 898 gen4_init_vtable_surface_functions(struct brw_context *brw)
 899 {
 900    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 901    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 902    brw->vtbl.update_null_renderbuffer_surface =
 903       brw_update_null_renderbuffer_surface;
 904    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 905 }