src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch,
 201                                unsigned mocs,
 202                                bool rw)
 203 {
 204    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 205                                     6 * 4, 32, out_offset);
 206    memset(surf, 0, 6 * 4);
 207
 208    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 209              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 210              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 211    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 212    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 213              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 214    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 215              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 216
 217    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 218     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 219     * physical cache.  It is mapped in hardware to the sampler cache."
 220     */
 221    if (bo) {
 222       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 223                               bo, buffer_offset,
 224                               I915_GEM_DOMAIN_SAMPLER,
 225                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 226    }
 227 }
 228
 229 void
 230 brw_update_buffer_texture_surface(struct gl_context *ctx,
 231                                   unsigned unit,
 232                                   uint32_t *surf_offset)
 233 {
 234    struct brw_context *brw = brw_context(ctx);
 235    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 236    struct intel_buffer_object *intel_obj =
 237       intel_buffer_object(tObj->BufferObject);
 238    uint32_t size = tObj->BufferSize;
 239    drm_intel_bo *bo = NULL;
 240    gl_format format = tObj->_BufferObjectFormat;
 241    uint32_t brw_format = brw_format_for_mesa_format(format);
 242    int texel_size = _mesa_get_format_bytes(format);
 243
 244    if (intel_obj) {
 245       size = MIN2(size, intel_obj->Base.Size);
 246       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 247    }
 248
 249    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 250       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 251                     _mesa_get_format_name(format));
 252    }
 253
 254    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 255                                        tObj->BufferOffset,
 256                                        brw_format,
 257                                        size / texel_size,
 258                                        texel_size,
 259                                        0, /* mocs */
 260                                        false /* rw */);
 261 }
 262
 263 static void
 264 brw_update_texture_surface(struct gl_context *ctx,
 265                            unsigned unit,
 266                            uint32_t *surf_offset,
 267                            bool for_gather)
 268 {
 269    struct brw_context *brw = brw_context(ctx);
 270    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 271    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 272    struct intel_mipmap_tree *mt = intelObj->mt;
 273    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 274    uint32_t *surf;
 275
 276    /* BRW_NEW_UNIFORM_BUFFER */
 277    if (tObj->Target == GL_TEXTURE_BUFFER) {
 278       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 279       return;
 280    }
 281
 282    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 283                           6 * 4, 32, surf_offset);
 284
 285    (void) for_gather;   /* no w/a to apply for this gen */
 286
 287    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 288               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 289               BRW_SURFACE_CUBEFACE_ENABLES |
 290               (translate_tex_format(brw,
 291                                     mt->format,
 292                                     tObj->DepthMode,
 293                                     sampler->sRGBDecode) <<
 294                BRW_SURFACE_FORMAT_SHIFT));
 295
 296    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 297
 298    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 299               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 300               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 301
 302    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 303               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 304               (intelObj->mt->region->pitch - 1) <<
 305               BRW_SURFACE_PITCH_SHIFT);
 306
 307    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 308               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 309
 310    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 311
 312    /* Emit relocation to surface contents */
 313    drm_intel_bo_emit_reloc(brw->batch.bo,
 314                            *surf_offset + 4,
 315                            intelObj->mt->region->bo,
 316                            surf[1] - intelObj->mt->region->bo->offset,
 317                            I915_GEM_DOMAIN_SAMPLER, 0);
 318 }
 319
 320 /**
 321  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 322  * read from this buffer with Data Port Read instructions/messages.
 323  */
 324 void
 325 brw_create_constant_surface(struct brw_context *brw,
 326                             drm_intel_bo *bo,
 327                             uint32_t offset,
 328                             uint32_t size,
 329                             uint32_t *out_offset,
 330                             bool dword_pitch)
 331 {
 332    uint32_t stride = dword_pitch ? 4 : 16;
 333    uint32_t elements = ALIGN(size, stride) / stride;
 334
 335    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 336                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 337                                        elements, stride, 0, false);
 338 }
 339
 340 /**
 341  * Set up a binding table entry for use by stream output logic (transform
 342  * feedback).
 343  *
 344  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 345  */
 346 void
 347 brw_update_sol_surface(struct brw_context *brw,
 348                        struct gl_buffer_object *buffer_obj,
 349                        uint32_t *out_offset, unsigned num_vector_components,
 350                        unsigned stride_dwords, unsigned offset_dwords)
 351 {
 352    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 353    uint32_t offset_bytes = 4 * offset_dwords;
 354    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 355                                              offset_bytes,
 356                                              buffer_obj->Size - offset_bytes);
 357    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 358                                     out_offset);
 359    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 360    size_t size_dwords = buffer_obj->Size / 4;
 361    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 362
 363    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 364     * too big to map using a single binding table entry?
 365     */
 366    assert((size_dwords - offset_dwords) / stride_dwords
 367           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 368
 369    if (size_dwords > offset_dwords + num_vector_components) {
 370       /* There is room for at least 1 transform feedback output in the buffer.
 371        * Compute the number of additional transform feedback outputs the
 372        * buffer has room for.
 373        */
 374       buffer_size_minus_1 =
 375          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 376    } else {
 377       /* There isn't even room for a single transform feedback output in the
 378        * buffer.  We can't configure the binding table entry to prevent output
 379        * entirely; we'll have to rely on the geometry shader to detect
 380        * overflow.  But to minimize the damage in case of a bug, set up the
 381        * binding table entry to just allow a single output.
 382        */
 383       buffer_size_minus_1 = 0;
 384    }
 385    width = buffer_size_minus_1 & 0x7f;
 386    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 387    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 388
 389    switch (num_vector_components) {
 390    case 1:
 391       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 392       break;
 393    case 2:
 394       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 395       break;
 396    case 3:
 397       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 398       break;
 399    case 4:
 400       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 401       break;
 402    default:
 403       assert(!"Invalid vector size for transform feedback output");
 404       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 405       break;
 406    }
 407
 408    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 409       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 410       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 411       BRW_SURFACE_RC_READ_WRITE;
 412    surf[1] = bo->offset + offset_bytes; /* reloc */
 413    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 414               height << BRW_SURFACE_HEIGHT_SHIFT);
 415    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 416               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 417    surf[4] = 0;
 418    surf[5] = 0;
 419
 420    /* Emit relocation to surface contents. */
 421    drm_intel_bo_emit_reloc(brw->batch.bo,
 422                            *out_offset + 4,
 423                            bo, offset_bytes,
 424                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 425 }
 426
 427 /* Creates a new WM constant buffer reflecting the current fragment program's
 428  * constants, if needed by the fragment program.
 429  *
 430  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 431  * state atom.
 432  */
 433 static void
 434 brw_upload_wm_pull_constants(struct brw_context *brw)
 435 {
 436    struct gl_context *ctx = &brw->ctx;
 437    /* BRW_NEW_FRAGMENT_PROGRAM */
 438    struct brw_fragment_program *fp =
 439       (struct brw_fragment_program *) brw->fragment_program;
 440    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 441    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 442    const int surf_index =
 443       brw->wm.prog_data->base.binding_table.pull_constants_start;
 444    float *constants;
 445    unsigned int i;
 446
 447    _mesa_load_state_parameters(ctx, params);
 448
 449    /* CACHE_NEW_WM_PROG */
 450    if (brw->wm.prog_data->nr_pull_params == 0) {
 451       if (brw->wm.base.const_bo) {
 452          drm_intel_bo_unreference(brw->wm.base.const_bo);
 453          brw->wm.base.const_bo = NULL;
 454          brw->wm.base.surf_offset[surf_index] = 0;
 455          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 456       }
 457       return;
 458    }
 459
 460    drm_intel_bo_unreference(brw->wm.base.const_bo);
 461    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 462                                          size, 64);
 463
 464    /* _NEW_PROGRAM_CONSTANTS */
 465    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 466    constants = brw->wm.base.const_bo->virtual;
 467    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 468       constants[i] = *brw->wm.prog_data->pull_param[i];
 469    }
 470    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 471
 472    brw_create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 473                                &brw->wm.base.surf_offset[surf_index],
 474                                true);
 475
 476    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 477 }
 478
 479 const struct brw_tracked_state brw_wm_pull_constants = {
 480    .dirty = {
 481       .mesa = (_NEW_PROGRAM_CONSTANTS),
 482       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 483       .cache = CACHE_NEW_WM_PROG,
 484    },
 485    .emit = brw_upload_wm_pull_constants,
 486 };
 487
 488 static void
 489 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 490 {
 491    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 492     * Notes):
 493     *
 494     *     A null surface will be used in instances where an actual surface is
 495     *     not bound. When a write message is generated to a null surface, no
 496     *     actual surface is written to. When a read message (including any
 497     *     sampling engine message) is generated to a null surface, the result
 498     *     is all zeros. Note that a null surface type is allowed to be used
 499     *     with all messages, even if it is not specificially indicated as
 500     *     supported. All of the remaining fields in surface state are ignored
 501     *     for null surfaces, with the following exceptions:
 502     *
 503     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 504     *       depth buffer’s corresponding state for all render target surfaces,
 505     *       including null.
 506     *
 507     *     - Surface Format must be R8G8B8A8_UNORM.
 508     */
 509    struct gl_context *ctx = &brw->ctx;
 510    uint32_t *surf;
 511    unsigned surface_type = BRW_SURFACE_NULL;
 512    drm_intel_bo *bo = NULL;
 513    unsigned pitch_minus_1 = 0;
 514    uint32_t multisampling_state = 0;
 515    uint32_t surf_index =
 516       brw->wm.prog_data->binding_table.render_target_start + unit;
 517
 518    /* _NEW_BUFFERS */
 519    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 520
 521    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 522                           &brw->wm.base.surf_offset[surf_index]);
 523
 524    if (fb->Visual.samples > 1) {
 525       /* On Gen6, null render targets seem to cause GPU hangs when
 526        * multisampling.  So work around this problem by rendering into dummy
 527        * color buffer.
 528        *
 529        * To decrease the amount of memory needed by the workaround buffer, we
 530        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 531        * the amount of memory needed for the workaround buffer is
 532        * (width_in_tiles + height_in_tiles - 1) tiles.
 533        *
 534        * Note that since the workaround buffer will be interpreted by the
 535        * hardware as an interleaved multisampled buffer, we need to compute
 536        * width_in_tiles and height_in_tiles by dividing the width and height
 537        * by 16 rather than the normal Y-tile size of 32.
 538        */
 539       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 540       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 541       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 542       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 543                          size_needed);
 544       bo = brw->wm.multisampled_null_render_target_bo;
 545       surface_type = BRW_SURFACE_2D;
 546       pitch_minus_1 = 127;
 547       multisampling_state =
 548          brw_get_surface_num_multisamples(fb->Visual.samples);
 549    }
 550
 551    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 552               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 553    if (brw->gen < 6) {
 554       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 555                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 556                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 557                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 558    }
 559    surf[1] = bo ? bo->offset : 0;
 560    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 561               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 562
 563    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 564     * Notes):
 565     *
 566     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 567     */
 568    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 569               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 570    surf[4] = multisampling_state;
 571    surf[5] = 0;
 572
 573    if (bo) {
 574       drm_intel_bo_emit_reloc(brw->batch.bo,
 575                               brw->wm.base.surf_offset[surf_index] + 4,
 576                               bo, 0,
 577                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 578    }
 579 }
 580
 581 /**
 582  * Sets up a surface state structure to point at the given region.
 583  * While it is only used for the front/back buffer currently, it should be
 584  * usable for further buffers when doing ARB_draw_buffer support.
 585  */
 586 static void
 587 brw_update_renderbuffer_surface(struct brw_context *brw,
 588                                 struct gl_renderbuffer *rb,
 589                                 bool layered,
 590                                 unsigned int unit)
 591 {
 592    struct gl_context *ctx = &brw->ctx;
 593    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 594    struct intel_mipmap_tree *mt = irb->mt;
 595    struct intel_region *region;
 596    uint32_t *surf;
 597    uint32_t tile_x, tile_y;
 598    uint32_t format = 0;
 599    /* _NEW_BUFFERS */
 600    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 601    uint32_t surf_index =
 602       brw->wm.prog_data->binding_table.render_target_start + unit;
 603
 604    assert(!layered);
 605
 606    if (rb->TexImage && !brw->has_surface_tile_offset) {
 607       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 608
 609       if (tile_x != 0 || tile_y != 0) {
 610          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 611           * destination in a miptree unless you actually setup your renderbuffer
 612           * as a miptree and used the fragile lod/array_index/etc. controls to
 613           * select the image.  So, instead, we just make a new single-level
 614           * miptree and render into that.
 615           */
 616          intel_renderbuffer_move_to_temp(brw, irb, false);
 617          mt = irb->mt;
 618       }
 619    }
 620
 621    intel_miptree_used_for_rendering(irb->mt);
 622
 623    region = irb->mt->region;
 624
 625    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 626                           &brw->wm.base.surf_offset[surf_index]);
 627
 628    format = brw->render_target_format[rb_format];
 629    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 630       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 631                     __FUNCTION__, _mesa_get_format_name(rb_format));
 632    }
 633
 634    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 635               format << BRW_SURFACE_FORMAT_SHIFT);
 636
 637    /* reloc */
 638    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 639               region->bo->offset);
 640
 641    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 642               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 643
 644    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 645               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 646
 647    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 648
 649    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 650    /* Note that the low bits of these fields are missing, so
 651     * there's the possibility of getting in trouble.
 652     */
 653    assert(tile_x % 4 == 0);
 654    assert(tile_y % 2 == 0);
 655    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 656               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 657               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 658
 659    if (brw->gen < 6) {
 660       /* _NEW_COLOR */
 661       if (!ctx->Color.ColorLogicOpEnabled &&
 662           (ctx->Color.BlendEnabled & (1 << unit)))
 663          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 664
 665       if (!ctx->Color.ColorMask[unit][0])
 666          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 667       if (!ctx->Color.ColorMask[unit][1])
 668          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 669       if (!ctx->Color.ColorMask[unit][2])
 670          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 671
 672       /* As mentioned above, disable writes to the alpha component when the
 673        * renderbuffer is XRGB.
 674        */
 675       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 676           !ctx->Color.ColorMask[unit][3]) {
 677          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 678       }
 679    }
 680
 681    drm_intel_bo_emit_reloc(brw->batch.bo,
 682                            brw->wm.base.surf_offset[surf_index] + 4,
 683                            region->bo,
 684                            surf[1] - region->bo->offset,
 685                            I915_GEM_DOMAIN_RENDER,
 686                            I915_GEM_DOMAIN_RENDER);
 687 }
 688
 689 /**
 690  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 691  */
 692 static void
 693 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 694 {
 695    struct gl_context *ctx = &brw->ctx;
 696    GLuint i;
 697
 698    /* _NEW_BUFFERS | _NEW_COLOR */
 699    /* Update surfaces for drawing buffers */
 700    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 701       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 702          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 703             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 704                                                   ctx->DrawBuffer->Layered, i);
 705          } else {
 706             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 707          }
 708       }
 709    } else {
 710       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 711    }
 712    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 713 }
 714
 715 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 716    .dirty = {
 717       .mesa = (_NEW_COLOR |
 718                _NEW_BUFFERS),
 719       .brw = BRW_NEW_BATCH,
 720       .cache = 0
 721    },
 722    .emit = brw_update_renderbuffer_surfaces,
 723 };
 724
 725 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 726    .dirty = {
 727       .mesa = _NEW_BUFFERS,
 728       .brw = BRW_NEW_BATCH,
 729       .cache = 0
 730    },
 731    .emit = brw_update_renderbuffer_surfaces,
 732 };
 733
 734
 735 static void
 736 update_stage_texture_surfaces(struct brw_context *brw,
 737                               const struct gl_program *prog,
 738                               struct brw_stage_state *stage_state,
 739                               bool for_gather)
 740 {
 741    if (!prog)
 742       return;
 743
 744    struct gl_context *ctx = &brw->ctx;
 745
 746    uint32_t *surf_offset = stage_state->surf_offset;
 747    if (for_gather)
 748       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 749    else
 750       surf_offset += stage_state->prog_data->binding_table.texture_start;
 751
 752    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 753    for (unsigned s = 0; s < num_samplers; s++) {
 754       surf_offset[s] = 0;
 755
 756       if (prog->SamplersUsed & (1 << s)) {
 757          const unsigned unit = prog->SamplerUnits[s];
 758
 759          /* _NEW_TEXTURE */
 760          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 761             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 762          }
 763       }
 764    }
 765 }
 766
 767
 768 /**
 769  * Construct SURFACE_STATE objects for enabled textures.
 770  */
 771 static void
 772 brw_update_texture_surfaces(struct brw_context *brw)
 773 {
 774    /* BRW_NEW_VERTEX_PROGRAM */
 775    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 776
 777    /* BRW_NEW_GEOMETRY_PROGRAM */
 778    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 779
 780    /* BRW_NEW_FRAGMENT_PROGRAM */
 781    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 782
 783    /* _NEW_TEXTURE */
 784    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 785    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 786    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 787
 788    /* emit alternate set of surface state for gather. this
 789     * allows the surface format to be overriden for only the
 790     * gather4 messages. */
 791    if (vs && vs->UsesGather)
 792       update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 793    if (gs && gs->UsesGather)
 794       update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 795    if (fs && fs->UsesGather)
 796       update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 797
 798    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 799 }
 800
 801 const struct brw_tracked_state brw_texture_surfaces = {
 802    .dirty = {
 803       .mesa = _NEW_TEXTURE,
 804       .brw = BRW_NEW_BATCH |
 805              BRW_NEW_UNIFORM_BUFFER |
 806              BRW_NEW_VERTEX_PROGRAM |
 807              BRW_NEW_GEOMETRY_PROGRAM |
 808              BRW_NEW_FRAGMENT_PROGRAM,
 809       .cache = 0
 810    },
 811    .emit = brw_update_texture_surfaces,
 812 };
 813
 814 void
 815 brw_upload_ubo_surfaces(struct brw_context *brw,
 816                         struct gl_shader *shader,
 817                         struct brw_stage_state *stage_state,
 818                         struct brw_stage_prog_data *prog_data)
 819 {
 820    struct gl_context *ctx = &brw->ctx;
 821
 822    if (!shader)
 823       return;
 824
 825    uint32_t *surf_offsets =
 826       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 827
 828    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 829       struct gl_uniform_buffer_binding *binding;
 830       struct intel_buffer_object *intel_bo;
 831
 832       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 833       intel_bo = intel_buffer_object(binding->BufferObject);
 834       drm_intel_bo *bo =
 835          intel_bufferobj_buffer(brw, intel_bo,
 836                                 binding->Offset,
 837                                 binding->BufferObject->Size - binding->Offset);
 838
 839       /* Because behavior for referencing outside of the binding's size in the
 840        * glBindBufferRange case is undefined, we can just bind the whole buffer
 841        * glBindBufferBase wants and be a correct implementation.
 842        */
 843       brw_create_constant_surface(brw, bo, binding->Offset,
 844                                   bo->size - binding->Offset,
 845                                   &surf_offsets[i],
 846                                   shader->Type == GL_FRAGMENT_SHADER);
 847    }
 848
 849    if (shader->NumUniformBlocks)
 850       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 851 }
 852
 853 static void
 854 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 855 {
 856    struct gl_context *ctx = &brw->ctx;
 857    /* _NEW_PROGRAM */
 858    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 859
 860    if (!prog)
 861       return;
 862
 863    /* CACHE_NEW_WM_PROG */
 864    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 865                            &brw->wm.base, &brw->wm.prog_data->base);
 866 }
 867
 868 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 869    .dirty = {
 870       .mesa = _NEW_PROGRAM,
 871       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 872       .cache = CACHE_NEW_WM_PROG,
 873    },
 874    .emit = brw_upload_wm_ubo_surfaces,
 875 };
 876
 877 void
 878 brw_upload_abo_surfaces(struct brw_context *brw,
 879                         struct gl_shader_program *prog,
 880                         struct brw_stage_state *stage_state,
 881                         struct brw_stage_prog_data *prog_data)
 882 {
 883    struct gl_context *ctx = &brw->ctx;
 884    uint32_t *surf_offsets =
 885       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 886
 887    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 888       struct gl_atomic_buffer_binding *binding =
 889          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 890       struct intel_buffer_object *intel_bo =
 891          intel_buffer_object(binding->BufferObject);
 892       drm_intel_bo *bo = intel_bufferobj_buffer(
 893          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 894
 895       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 896                                    bo->size - binding->Offset,
 897                                    &surf_offsets[i], true);
 898    }
 899
 900    if (prog->NumUniformBlocks)
 901       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 902 }
 903
 904 static void
 905 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 906 {
 907    struct gl_context *ctx = &brw->ctx;
 908    /* _NEW_PROGRAM */
 909    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 910
 911    if (prog) {
 912       /* CACHE_NEW_WM_PROG */
 913       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 914                               &brw->wm.prog_data->base);
 915    }
 916 }
 917
 918 const struct brw_tracked_state brw_wm_abo_surfaces = {
 919    .dirty = {
 920       .mesa = _NEW_PROGRAM,
 921       .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
 922       .cache = CACHE_NEW_WM_PROG,
 923    },
 924    .emit = brw_upload_wm_abo_surfaces,
 925 };
 926
 927 void
 928 gen4_init_vtable_surface_functions(struct brw_context *brw)
 929 {
 930    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 931    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 932    brw->vtbl.update_null_renderbuffer_surface =
 933       brw_update_null_renderbuffer_surface;
 934    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 935 }