src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch)
 201 {
 202    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 203                                     6 * 4, 32, out_offset);
 204    memset(surf, 0, 6 * 4);
 205
 206    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 207              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 208              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 209    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 210    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 211              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 212    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 213              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 214
 215    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 216     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 217     * physical cache.  It is mapped in hardware to the sampler cache."
 218     */
 219    if (bo) {
 220       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 221                               bo, buffer_offset,
 222                               I915_GEM_DOMAIN_SAMPLER, 0);
 223    }
 224 }
 225
 226 static void
 227 brw_update_buffer_texture_surface(struct gl_context *ctx,
 228                                   unsigned unit,
 229                                   uint32_t *surf_offset)
 230 {
 231    struct brw_context *brw = brw_context(ctx);
 232    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 233    struct intel_buffer_object *intel_obj =
 234       intel_buffer_object(tObj->BufferObject);
 235    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 236    gl_format format = tObj->_BufferObjectFormat;
 237    uint32_t brw_format = brw_format_for_mesa_format(format);
 238    int texel_size = _mesa_get_format_bytes(format);
 239    int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
 240
 241    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 242       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 243                     _mesa_get_format_name(format));
 244    }
 245
 246    gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
 247                                   brw_format,
 248                                   w, texel_size);
 249 }
 250
 251 static void
 252 brw_update_texture_surface(struct gl_context *ctx,
 253                            unsigned unit,
 254                            uint32_t *surf_offset)
 255 {
 256    struct brw_context *brw = brw_context(ctx);
 257    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 258    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 259    struct intel_mipmap_tree *mt = intelObj->mt;
 260    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 261    uint32_t *surf;
 262
 263    if (tObj->Target == GL_TEXTURE_BUFFER) {
 264       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 265       return;
 266    }
 267
 268    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 269                           6 * 4, 32, surf_offset);
 270
 271    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 272               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 273               BRW_SURFACE_CUBEFACE_ENABLES |
 274               (translate_tex_format(brw,
 275                                     mt->format,
 276                                     tObj->DepthMode,
 277                                     sampler->sRGBDecode) <<
 278                BRW_SURFACE_FORMAT_SHIFT));
 279
 280    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 281
 282    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 283               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 284               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 285
 286    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 287               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 288               (intelObj->mt->region->pitch - 1) <<
 289               BRW_SURFACE_PITCH_SHIFT);
 290
 291    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 292               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 293
 294    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 295
 296    /* Emit relocation to surface contents */
 297    drm_intel_bo_emit_reloc(brw->batch.bo,
 298                            *surf_offset + 4,
 299                            intelObj->mt->region->bo,
 300                            surf[1] - intelObj->mt->region->bo->offset,
 301                            I915_GEM_DOMAIN_SAMPLER, 0);
 302 }
 303
 304 /**
 305  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 306  * read from this buffer with Data Port Read instructions/messages.
 307  */
 308 static void
 309 brw_create_constant_surface(struct brw_context *brw,
 310                             drm_intel_bo *bo,
 311                             uint32_t offset,
 312                             uint32_t size,
 313                             uint32_t *out_offset,
 314                             bool dword_pitch)
 315 {
 316    uint32_t stride = dword_pitch ? 4 : 16;
 317    uint32_t elements = ALIGN(size, stride) / stride;
 318
 319    gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
 320                                   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 321                                   elements, stride);
 322 }
 323
 324 /**
 325  * Set up a binding table entry for use by stream output logic (transform
 326  * feedback).
 327  *
 328  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 329  */
 330 void
 331 brw_update_sol_surface(struct brw_context *brw,
 332                        struct gl_buffer_object *buffer_obj,
 333                        uint32_t *out_offset, unsigned num_vector_components,
 334                        unsigned stride_dwords, unsigned offset_dwords)
 335 {
 336    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 337    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 338    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 339                                     out_offset);
 340    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 341    uint32_t offset_bytes = 4 * offset_dwords;
 342    size_t size_dwords = buffer_obj->Size / 4;
 343    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 344
 345    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 346     * too big to map using a single binding table entry?
 347     */
 348    assert((size_dwords - offset_dwords) / stride_dwords
 349           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 350
 351    if (size_dwords > offset_dwords + num_vector_components) {
 352       /* There is room for at least 1 transform feedback output in the buffer.
 353        * Compute the number of additional transform feedback outputs the
 354        * buffer has room for.
 355        */
 356       buffer_size_minus_1 =
 357          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 358    } else {
 359       /* There isn't even room for a single transform feedback output in the
 360        * buffer.  We can't configure the binding table entry to prevent output
 361        * entirely; we'll have to rely on the geometry shader to detect
 362        * overflow.  But to minimize the damage in case of a bug, set up the
 363        * binding table entry to just allow a single output.
 364        */
 365       buffer_size_minus_1 = 0;
 366    }
 367    width = buffer_size_minus_1 & 0x7f;
 368    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 369    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 370
 371    switch (num_vector_components) {
 372    case 1:
 373       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 374       break;
 375    case 2:
 376       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 377       break;
 378    case 3:
 379       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 380       break;
 381    case 4:
 382       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 383       break;
 384    default:
 385       assert(!"Invalid vector size for transform feedback output");
 386       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 387       break;
 388    }
 389
 390    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 391       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 392       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 393       BRW_SURFACE_RC_READ_WRITE;
 394    surf[1] = bo->offset + offset_bytes; /* reloc */
 395    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 396               height << BRW_SURFACE_HEIGHT_SHIFT);
 397    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 398               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 399    surf[4] = 0;
 400    surf[5] = 0;
 401
 402    /* Emit relocation to surface contents. */
 403    drm_intel_bo_emit_reloc(brw->batch.bo,
 404                            *out_offset + 4,
 405                            bo, offset_bytes,
 406                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 407 }
 408
 409 /* Creates a new WM constant buffer reflecting the current fragment program's
 410  * constants, if needed by the fragment program.
 411  *
 412  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 413  * state atom.
 414  */
 415 static void
 416 brw_upload_wm_pull_constants(struct brw_context *brw)
 417 {
 418    struct gl_context *ctx = &brw->ctx;
 419    /* BRW_NEW_FRAGMENT_PROGRAM */
 420    struct brw_fragment_program *fp =
 421       (struct brw_fragment_program *) brw->fragment_program;
 422    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 423    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 424    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 425    float *constants;
 426    unsigned int i;
 427
 428    _mesa_load_state_parameters(ctx, params);
 429
 430    /* CACHE_NEW_WM_PROG */
 431    if (brw->wm.prog_data->nr_pull_params == 0) {
 432       if (brw->wm.base.const_bo) {
 433          drm_intel_bo_unreference(brw->wm.base.const_bo);
 434          brw->wm.base.const_bo = NULL;
 435          brw->wm.base.surf_offset[surf_index] = 0;
 436          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 437       }
 438       return;
 439    }
 440
 441    drm_intel_bo_unreference(brw->wm.base.const_bo);
 442    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 443                                          size, 64);
 444
 445    /* _NEW_PROGRAM_CONSTANTS */
 446    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 447    constants = brw->wm.base.const_bo->virtual;
 448    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 449       constants[i] = *brw->wm.prog_data->pull_param[i];
 450    }
 451    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 452
 453    brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 454                                      &brw->wm.base.surf_offset[surf_index],
 455                                      true);
 456
 457    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 458 }
 459
 460 const struct brw_tracked_state brw_wm_pull_constants = {
 461    .dirty = {
 462       .mesa = (_NEW_PROGRAM_CONSTANTS),
 463       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 464       .cache = CACHE_NEW_WM_PROG,
 465    },
 466    .emit = brw_upload_wm_pull_constants,
 467 };
 468
 469 static void
 470 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 471 {
 472    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 473     * Notes):
 474     *
 475     *     A null surface will be used in instances where an actual surface is
 476     *     not bound. When a write message is generated to a null surface, no
 477     *     actual surface is written to. When a read message (including any
 478     *     sampling engine message) is generated to a null surface, the result
 479     *     is all zeros. Note that a null surface type is allowed to be used
 480     *     with all messages, even if it is not specificially indicated as
 481     *     supported. All of the remaining fields in surface state are ignored
 482     *     for null surfaces, with the following exceptions:
 483     *
 484     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 485     *       depth buffer’s corresponding state for all render target surfaces,
 486     *       including null.
 487     *
 488     *     - Surface Format must be R8G8B8A8_UNORM.
 489     */
 490    struct gl_context *ctx = &brw->ctx;
 491    uint32_t *surf;
 492    unsigned surface_type = BRW_SURFACE_NULL;
 493    drm_intel_bo *bo = NULL;
 494    unsigned pitch_minus_1 = 0;
 495    uint32_t multisampling_state = 0;
 496
 497    /* _NEW_BUFFERS */
 498    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 499
 500    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 501                           &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
 502
 503    if (fb->Visual.samples > 1) {
 504       /* On Gen6, null render targets seem to cause GPU hangs when
 505        * multisampling.  So work around this problem by rendering into dummy
 506        * color buffer.
 507        *
 508        * To decrease the amount of memory needed by the workaround buffer, we
 509        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 510        * the amount of memory needed for the workaround buffer is
 511        * (width_in_tiles + height_in_tiles - 1) tiles.
 512        *
 513        * Note that since the workaround buffer will be interpreted by the
 514        * hardware as an interleaved multisampled buffer, we need to compute
 515        * width_in_tiles and height_in_tiles by dividing the width and height
 516        * by 16 rather than the normal Y-tile size of 32.
 517        */
 518       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 519       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 520       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 521       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 522                          size_needed);
 523       bo = brw->wm.multisampled_null_render_target_bo;
 524       surface_type = BRW_SURFACE_2D;
 525       pitch_minus_1 = 127;
 526       multisampling_state =
 527          brw_get_surface_num_multisamples(fb->Visual.samples);
 528    }
 529
 530    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 531               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 532    if (brw->gen < 6) {
 533       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 534                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 535                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 536                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 537    }
 538    surf[1] = bo ? bo->offset : 0;
 539    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 540               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 541
 542    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 543     * Notes):
 544     *
 545     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 546     */
 547    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 548               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 549    surf[4] = multisampling_state;
 550    surf[5] = 0;
 551
 552    if (bo) {
 553       drm_intel_bo_emit_reloc(brw->batch.bo,
 554                               brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 555                               bo, 0,
 556                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 557    }
 558 }
 559
 560 /**
 561  * Sets up a surface state structure to point at the given region.
 562  * While it is only used for the front/back buffer currently, it should be
 563  * usable for further buffers when doing ARB_draw_buffer support.
 564  */
 565 static void
 566 brw_update_renderbuffer_surface(struct brw_context *brw,
 567                                 struct gl_renderbuffer *rb,
 568                                 bool layered,
 569                                 unsigned int unit)
 570 {
 571    struct gl_context *ctx = &brw->ctx;
 572    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 573    struct intel_mipmap_tree *mt = irb->mt;
 574    struct intel_region *region;
 575    uint32_t *surf;
 576    uint32_t tile_x, tile_y;
 577    uint32_t format = 0;
 578    /* _NEW_BUFFERS */
 579    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 580
 581    assert(!layered);
 582
 583    if (rb->TexImage && !brw->has_surface_tile_offset) {
 584       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 585
 586       if (tile_x != 0 || tile_y != 0) {
 587          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 588           * destination in a miptree unless you actually setup your renderbuffer
 589           * as a miptree and used the fragile lod/array_index/etc. controls to
 590           * select the image.  So, instead, we just make a new single-level
 591           * miptree and render into that.
 592           */
 593          intel_renderbuffer_move_to_temp(brw, irb, false);
 594          mt = irb->mt;
 595       }
 596    }
 597
 598    intel_miptree_used_for_rendering(irb->mt);
 599
 600    region = irb->mt->region;
 601
 602    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 603                           &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
 604
 605    format = brw->render_target_format[rb_format];
 606    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 607       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 608                     __FUNCTION__, _mesa_get_format_name(rb_format));
 609    }
 610
 611    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 612               format << BRW_SURFACE_FORMAT_SHIFT);
 613
 614    /* reloc */
 615    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 616               region->bo->offset);
 617
 618    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 619               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 620
 621    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 622               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 623
 624    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 625
 626    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 627    /* Note that the low bits of these fields are missing, so
 628     * there's the possibility of getting in trouble.
 629     */
 630    assert(tile_x % 4 == 0);
 631    assert(tile_y % 2 == 0);
 632    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 633               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 634               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 635
 636    if (brw->gen < 6) {
 637       /* _NEW_COLOR */
 638       if (!ctx->Color.ColorLogicOpEnabled &&
 639           (ctx->Color.BlendEnabled & (1 << unit)))
 640          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 641
 642       if (!ctx->Color.ColorMask[unit][0])
 643          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 644       if (!ctx->Color.ColorMask[unit][1])
 645          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 646       if (!ctx->Color.ColorMask[unit][2])
 647          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 648
 649       /* As mentioned above, disable writes to the alpha component when the
 650        * renderbuffer is XRGB.
 651        */
 652       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 653           !ctx->Color.ColorMask[unit][3]) {
 654          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 655       }
 656    }
 657
 658    drm_intel_bo_emit_reloc(brw->batch.bo,
 659                            brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 660                            region->bo,
 661                            surf[1] - region->bo->offset,
 662                            I915_GEM_DOMAIN_RENDER,
 663                            I915_GEM_DOMAIN_RENDER);
 664 }
 665
 666 /**
 667  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 668  */
 669 static void
 670 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 671 {
 672    struct gl_context *ctx = &brw->ctx;
 673    GLuint i;
 674
 675    /* _NEW_BUFFERS | _NEW_COLOR */
 676    /* Update surfaces for drawing buffers */
 677    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 678       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 679          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 680             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 681                                                   ctx->DrawBuffer->Layered, i);
 682          } else {
 683             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 684          }
 685       }
 686    } else {
 687       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 688    }
 689    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 690 }
 691
 692 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 693    .dirty = {
 694       .mesa = (_NEW_COLOR |
 695                _NEW_BUFFERS),
 696       .brw = BRW_NEW_BATCH,
 697       .cache = 0
 698    },
 699    .emit = brw_update_renderbuffer_surfaces,
 700 };
 701
 702 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 703    .dirty = {
 704       .mesa = _NEW_BUFFERS,
 705       .brw = BRW_NEW_BATCH,
 706       .cache = 0
 707    },
 708    .emit = brw_update_renderbuffer_surfaces,
 709 };
 710
 711
 712 static void
 713 update_stage_texture_surfaces(struct brw_context *brw,
 714                               const struct gl_program *prog,
 715                               uint32_t *surf_offset)
 716 {
 717    if (!prog)
 718       return;
 719
 720    struct gl_context *ctx = &brw->ctx;
 721
 722    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 723
 724    for (unsigned s = 0; s < num_samplers; s++) {
 725       surf_offset[s] = 0;
 726
 727       if (prog->SamplersUsed & (1 << s)) {
 728          const unsigned unit = prog->SamplerUnits[s];
 729
 730          /* _NEW_TEXTURE */
 731          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 732             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s);
 733          }
 734       }
 735    }
 736 }
 737
 738
 739 /**
 740  * Construct SURFACE_STATE objects for enabled textures.
 741  */
 742 static void
 743 brw_update_texture_surfaces(struct brw_context *brw)
 744 {
 745    /* BRW_NEW_VERTEX_PROGRAM */
 746    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 747
 748    /* BRW_NEW_GEOMETRY_PROGRAM */
 749    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 750
 751    /* BRW_NEW_FRAGMENT_PROGRAM */
 752    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 753
 754    /* _NEW_TEXTURE */
 755    update_stage_texture_surfaces(brw, vs,
 756                                  brw->vs.base.surf_offset +
 757                                  SURF_INDEX_VEC4_TEXTURE(0));
 758    update_stage_texture_surfaces(brw, gs,
 759                                  brw->gs.base.surf_offset +
 760                                  SURF_INDEX_VEC4_TEXTURE(0));
 761    update_stage_texture_surfaces(brw, fs,
 762                                  brw->wm.base.surf_offset +
 763                                  SURF_INDEX_TEXTURE(0));
 764
 765    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 766 }
 767
 768 const struct brw_tracked_state brw_texture_surfaces = {
 769    .dirty = {
 770       .mesa = _NEW_TEXTURE,
 771       .brw = BRW_NEW_BATCH |
 772              BRW_NEW_VERTEX_PROGRAM |
 773              BRW_NEW_GEOMETRY_PROGRAM |
 774              BRW_NEW_FRAGMENT_PROGRAM,
 775       .cache = 0
 776    },
 777    .emit = brw_update_texture_surfaces,
 778 };
 779
 780 void
 781 brw_upload_ubo_surfaces(struct brw_context *brw,
 782                         struct gl_shader *shader,
 783                         uint32_t *surf_offsets)
 784 {
 785    struct gl_context *ctx = &brw->ctx;
 786
 787    if (!shader)
 788       return;
 789
 790    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 791       struct gl_uniform_buffer_binding *binding;
 792       struct intel_buffer_object *intel_bo;
 793
 794       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 795       intel_bo = intel_buffer_object(binding->BufferObject);
 796       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 797
 798       /* Because behavior for referencing outside of the binding's size in the
 799        * glBindBufferRange case is undefined, we can just bind the whole buffer
 800        * glBindBufferBase wants and be a correct implementation.
 801        */
 802       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 803                                         bo->size - binding->Offset,
 804                                         &surf_offsets[i],
 805                                         shader->Type == GL_FRAGMENT_SHADER);
 806    }
 807
 808    if (shader->NumUniformBlocks)
 809       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 810 }
 811
 812 static void
 813 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 814 {
 815    struct gl_context *ctx = &brw->ctx;
 816    /* _NEW_PROGRAM */
 817    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 818
 819    if (!prog)
 820       return;
 821
 822    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 823                            &brw->wm.base.surf_offset[SURF_INDEX_WM_UBO(0)]);
 824 }
 825
 826 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 827    .dirty = {
 828       .mesa = _NEW_PROGRAM,
 829       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 830       .cache = 0,
 831    },
 832    .emit = brw_upload_wm_ubo_surfaces,
 833 };
 834
 835 void
 836 gen4_init_vtable_surface_functions(struct brw_context *brw)
 837 {
 838    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 839    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 840    brw->vtbl.update_null_renderbuffer_surface =
 841       brw_update_null_renderbuffer_surface;
 842    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 843 }