src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch)
 201 {
 202    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 203                                     6 * 4, 32, out_offset);
 204    memset(surf, 0, 6 * 4);
 205
 206    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 207              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 208              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 209    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 210    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 211              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 212    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 213              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 214
 215    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 216     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 217     * physical cache.  It is mapped in hardware to the sampler cache."
 218     */
 219    if (bo) {
 220       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 221                               bo, buffer_offset,
 222                               I915_GEM_DOMAIN_SAMPLER, 0);
 223    }
 224 }
 225
 226 static void
 227 brw_update_buffer_texture_surface(struct gl_context *ctx,
 228                                   unsigned unit,
 229                                   uint32_t *surf_offset)
 230 {
 231    struct brw_context *brw = brw_context(ctx);
 232    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 233    struct intel_buffer_object *intel_obj =
 234       intel_buffer_object(tObj->BufferObject);
 235    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 236    gl_format format = tObj->_BufferObjectFormat;
 237    uint32_t brw_format = brw_format_for_mesa_format(format);
 238    int texel_size = _mesa_get_format_bytes(format);
 239    int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
 240
 241    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 242       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 243                     _mesa_get_format_name(format));
 244    }
 245
 246    gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
 247                                   brw_format,
 248                                   w, texel_size);
 249 }
 250
 251 static void
 252 brw_update_texture_surface(struct gl_context *ctx,
 253                            unsigned unit,
 254                            uint32_t *surf_offset,
 255                            bool for_gather)
 256 {
 257    struct brw_context *brw = brw_context(ctx);
 258    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 259    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 260    struct intel_mipmap_tree *mt = intelObj->mt;
 261    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 262    uint32_t *surf;
 263
 264    if (tObj->Target == GL_TEXTURE_BUFFER) {
 265       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 266       return;
 267    }
 268
 269    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 270                           6 * 4, 32, surf_offset);
 271
 272    (void) for_gather;   /* no w/a to apply for this gen */
 273
 274    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 275               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 276               BRW_SURFACE_CUBEFACE_ENABLES |
 277               (translate_tex_format(brw,
 278                                     mt->format,
 279                                     tObj->DepthMode,
 280                                     sampler->sRGBDecode) <<
 281                BRW_SURFACE_FORMAT_SHIFT));
 282
 283    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 284
 285    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 286               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 287               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 288
 289    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 290               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 291               (intelObj->mt->region->pitch - 1) <<
 292               BRW_SURFACE_PITCH_SHIFT);
 293
 294    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 295               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 296
 297    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 298
 299    /* Emit relocation to surface contents */
 300    drm_intel_bo_emit_reloc(brw->batch.bo,
 301                            *surf_offset + 4,
 302                            intelObj->mt->region->bo,
 303                            surf[1] - intelObj->mt->region->bo->offset,
 304                            I915_GEM_DOMAIN_SAMPLER, 0);
 305 }
 306
 307 /**
 308  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 309  * read from this buffer with Data Port Read instructions/messages.
 310  */
 311 static void
 312 brw_create_constant_surface(struct brw_context *brw,
 313                             drm_intel_bo *bo,
 314                             uint32_t offset,
 315                             uint32_t size,
 316                             uint32_t *out_offset,
 317                             bool dword_pitch)
 318 {
 319    uint32_t stride = dword_pitch ? 4 : 16;
 320    uint32_t elements = ALIGN(size, stride) / stride;
 321
 322    gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
 323                                   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 324                                   elements, stride);
 325 }
 326
 327 /**
 328  * Set up a binding table entry for use by stream output logic (transform
 329  * feedback).
 330  *
 331  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 332  */
 333 void
 334 brw_update_sol_surface(struct brw_context *brw,
 335                        struct gl_buffer_object *buffer_obj,
 336                        uint32_t *out_offset, unsigned num_vector_components,
 337                        unsigned stride_dwords, unsigned offset_dwords)
 338 {
 339    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 340    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 341    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 342                                     out_offset);
 343    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 344    uint32_t offset_bytes = 4 * offset_dwords;
 345    size_t size_dwords = buffer_obj->Size / 4;
 346    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 347
 348    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 349     * too big to map using a single binding table entry?
 350     */
 351    assert((size_dwords - offset_dwords) / stride_dwords
 352           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 353
 354    if (size_dwords > offset_dwords + num_vector_components) {
 355       /* There is room for at least 1 transform feedback output in the buffer.
 356        * Compute the number of additional transform feedback outputs the
 357        * buffer has room for.
 358        */
 359       buffer_size_minus_1 =
 360          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 361    } else {
 362       /* There isn't even room for a single transform feedback output in the
 363        * buffer.  We can't configure the binding table entry to prevent output
 364        * entirely; we'll have to rely on the geometry shader to detect
 365        * overflow.  But to minimize the damage in case of a bug, set up the
 366        * binding table entry to just allow a single output.
 367        */
 368       buffer_size_minus_1 = 0;
 369    }
 370    width = buffer_size_minus_1 & 0x7f;
 371    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 372    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 373
 374    switch (num_vector_components) {
 375    case 1:
 376       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 377       break;
 378    case 2:
 379       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 380       break;
 381    case 3:
 382       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 383       break;
 384    case 4:
 385       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 386       break;
 387    default:
 388       assert(!"Invalid vector size for transform feedback output");
 389       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 390       break;
 391    }
 392
 393    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 394       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 395       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 396       BRW_SURFACE_RC_READ_WRITE;
 397    surf[1] = bo->offset + offset_bytes; /* reloc */
 398    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 399               height << BRW_SURFACE_HEIGHT_SHIFT);
 400    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 401               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 402    surf[4] = 0;
 403    surf[5] = 0;
 404
 405    /* Emit relocation to surface contents. */
 406    drm_intel_bo_emit_reloc(brw->batch.bo,
 407                            *out_offset + 4,
 408                            bo, offset_bytes,
 409                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 410 }
 411
 412 /* Creates a new WM constant buffer reflecting the current fragment program's
 413  * constants, if needed by the fragment program.
 414  *
 415  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 416  * state atom.
 417  */
 418 static void
 419 brw_upload_wm_pull_constants(struct brw_context *brw)
 420 {
 421    struct gl_context *ctx = &brw->ctx;
 422    /* BRW_NEW_FRAGMENT_PROGRAM */
 423    struct brw_fragment_program *fp =
 424       (struct brw_fragment_program *) brw->fragment_program;
 425    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 426    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 427    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 428    float *constants;
 429    unsigned int i;
 430
 431    _mesa_load_state_parameters(ctx, params);
 432
 433    /* CACHE_NEW_WM_PROG */
 434    if (brw->wm.prog_data->nr_pull_params == 0) {
 435       if (brw->wm.base.const_bo) {
 436          drm_intel_bo_unreference(brw->wm.base.const_bo);
 437          brw->wm.base.const_bo = NULL;
 438          brw->wm.base.surf_offset[surf_index] = 0;
 439          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 440       }
 441       return;
 442    }
 443
 444    drm_intel_bo_unreference(brw->wm.base.const_bo);
 445    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 446                                          size, 64);
 447
 448    /* _NEW_PROGRAM_CONSTANTS */
 449    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 450    constants = brw->wm.base.const_bo->virtual;
 451    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 452       constants[i] = *brw->wm.prog_data->pull_param[i];
 453    }
 454    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 455
 456    brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 457                                      &brw->wm.base.surf_offset[surf_index],
 458                                      true);
 459
 460    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 461 }
 462
 463 const struct brw_tracked_state brw_wm_pull_constants = {
 464    .dirty = {
 465       .mesa = (_NEW_PROGRAM_CONSTANTS),
 466       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 467       .cache = CACHE_NEW_WM_PROG,
 468    },
 469    .emit = brw_upload_wm_pull_constants,
 470 };
 471
 472 static void
 473 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 474 {
 475    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 476     * Notes):
 477     *
 478     *     A null surface will be used in instances where an actual surface is
 479     *     not bound. When a write message is generated to a null surface, no
 480     *     actual surface is written to. When a read message (including any
 481     *     sampling engine message) is generated to a null surface, the result
 482     *     is all zeros. Note that a null surface type is allowed to be used
 483     *     with all messages, even if it is not specificially indicated as
 484     *     supported. All of the remaining fields in surface state are ignored
 485     *     for null surfaces, with the following exceptions:
 486     *
 487     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 488     *       depth buffer’s corresponding state for all render target surfaces,
 489     *       including null.
 490     *
 491     *     - Surface Format must be R8G8B8A8_UNORM.
 492     */
 493    struct gl_context *ctx = &brw->ctx;
 494    uint32_t *surf;
 495    unsigned surface_type = BRW_SURFACE_NULL;
 496    drm_intel_bo *bo = NULL;
 497    unsigned pitch_minus_1 = 0;
 498    uint32_t multisampling_state = 0;
 499
 500    /* _NEW_BUFFERS */
 501    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 502
 503    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 504                           &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
 505
 506    if (fb->Visual.samples > 1) {
 507       /* On Gen6, null render targets seem to cause GPU hangs when
 508        * multisampling.  So work around this problem by rendering into dummy
 509        * color buffer.
 510        *
 511        * To decrease the amount of memory needed by the workaround buffer, we
 512        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 513        * the amount of memory needed for the workaround buffer is
 514        * (width_in_tiles + height_in_tiles - 1) tiles.
 515        *
 516        * Note that since the workaround buffer will be interpreted by the
 517        * hardware as an interleaved multisampled buffer, we need to compute
 518        * width_in_tiles and height_in_tiles by dividing the width and height
 519        * by 16 rather than the normal Y-tile size of 32.
 520        */
 521       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 522       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 523       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 524       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 525                          size_needed);
 526       bo = brw->wm.multisampled_null_render_target_bo;
 527       surface_type = BRW_SURFACE_2D;
 528       pitch_minus_1 = 127;
 529       multisampling_state =
 530          brw_get_surface_num_multisamples(fb->Visual.samples);
 531    }
 532
 533    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 534               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 535    if (brw->gen < 6) {
 536       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 537                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 538                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 539                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 540    }
 541    surf[1] = bo ? bo->offset : 0;
 542    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 543               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 544
 545    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 546     * Notes):
 547     *
 548     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 549     */
 550    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 551               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 552    surf[4] = multisampling_state;
 553    surf[5] = 0;
 554
 555    if (bo) {
 556       drm_intel_bo_emit_reloc(brw->batch.bo,
 557                               brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 558                               bo, 0,
 559                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 560    }
 561 }
 562
 563 /**
 564  * Sets up a surface state structure to point at the given region.
 565  * While it is only used for the front/back buffer currently, it should be
 566  * usable for further buffers when doing ARB_draw_buffer support.
 567  */
 568 static void
 569 brw_update_renderbuffer_surface(struct brw_context *brw,
 570                                 struct gl_renderbuffer *rb,
 571                                 bool layered,
 572                                 unsigned int unit)
 573 {
 574    struct gl_context *ctx = &brw->ctx;
 575    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 576    struct intel_mipmap_tree *mt = irb->mt;
 577    struct intel_region *region;
 578    uint32_t *surf;
 579    uint32_t tile_x, tile_y;
 580    uint32_t format = 0;
 581    /* _NEW_BUFFERS */
 582    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 583
 584    assert(!layered);
 585
 586    if (rb->TexImage && !brw->has_surface_tile_offset) {
 587       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 588
 589       if (tile_x != 0 || tile_y != 0) {
 590          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 591           * destination in a miptree unless you actually setup your renderbuffer
 592           * as a miptree and used the fragile lod/array_index/etc. controls to
 593           * select the image.  So, instead, we just make a new single-level
 594           * miptree and render into that.
 595           */
 596          intel_renderbuffer_move_to_temp(brw, irb, false);
 597          mt = irb->mt;
 598       }
 599    }
 600
 601    intel_miptree_used_for_rendering(irb->mt);
 602
 603    region = irb->mt->region;
 604
 605    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 606                           &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
 607
 608    format = brw->render_target_format[rb_format];
 609    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 610       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 611                     __FUNCTION__, _mesa_get_format_name(rb_format));
 612    }
 613
 614    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 615               format << BRW_SURFACE_FORMAT_SHIFT);
 616
 617    /* reloc */
 618    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 619               region->bo->offset);
 620
 621    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 622               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 623
 624    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 625               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 626
 627    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 628
 629    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 630    /* Note that the low bits of these fields are missing, so
 631     * there's the possibility of getting in trouble.
 632     */
 633    assert(tile_x % 4 == 0);
 634    assert(tile_y % 2 == 0);
 635    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 636               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 637               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 638
 639    if (brw->gen < 6) {
 640       /* _NEW_COLOR */
 641       if (!ctx->Color.ColorLogicOpEnabled &&
 642           (ctx->Color.BlendEnabled & (1 << unit)))
 643          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 644
 645       if (!ctx->Color.ColorMask[unit][0])
 646          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 647       if (!ctx->Color.ColorMask[unit][1])
 648          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 649       if (!ctx->Color.ColorMask[unit][2])
 650          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 651
 652       /* As mentioned above, disable writes to the alpha component when the
 653        * renderbuffer is XRGB.
 654        */
 655       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 656           !ctx->Color.ColorMask[unit][3]) {
 657          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 658       }
 659    }
 660
 661    drm_intel_bo_emit_reloc(brw->batch.bo,
 662                            brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 663                            region->bo,
 664                            surf[1] - region->bo->offset,
 665                            I915_GEM_DOMAIN_RENDER,
 666                            I915_GEM_DOMAIN_RENDER);
 667 }
 668
 669 /**
 670  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 671  */
 672 static void
 673 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 674 {
 675    struct gl_context *ctx = &brw->ctx;
 676    GLuint i;
 677
 678    /* _NEW_BUFFERS | _NEW_COLOR */
 679    /* Update surfaces for drawing buffers */
 680    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 681       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 682          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 683             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 684                                                   ctx->DrawBuffer->Layered, i);
 685          } else {
 686             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 687          }
 688       }
 689    } else {
 690       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 691    }
 692    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 693 }
 694
 695 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 696    .dirty = {
 697       .mesa = (_NEW_COLOR |
 698                _NEW_BUFFERS),
 699       .brw = BRW_NEW_BATCH,
 700       .cache = 0
 701    },
 702    .emit = brw_update_renderbuffer_surfaces,
 703 };
 704
 705 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 706    .dirty = {
 707       .mesa = _NEW_BUFFERS,
 708       .brw = BRW_NEW_BATCH,
 709       .cache = 0
 710    },
 711    .emit = brw_update_renderbuffer_surfaces,
 712 };
 713
 714
 715 static void
 716 update_stage_texture_surfaces(struct brw_context *brw,
 717                               const struct gl_program *prog,
 718                               uint32_t *surf_offset,
 719                               bool for_gather)
 720 {
 721    if (!prog)
 722       return;
 723
 724    struct gl_context *ctx = &brw->ctx;
 725
 726    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 727
 728    for (unsigned s = 0; s < num_samplers; s++) {
 729       surf_offset[s] = 0;
 730
 731       if (prog->SamplersUsed & (1 << s)) {
 732          const unsigned unit = prog->SamplerUnits[s];
 733
 734          /* _NEW_TEXTURE */
 735          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 736             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 737          }
 738       }
 739    }
 740 }
 741
 742
 743 /**
 744  * Construct SURFACE_STATE objects for enabled textures.
 745  */
 746 static void
 747 brw_update_texture_surfaces(struct brw_context *brw)
 748 {
 749    /* BRW_NEW_VERTEX_PROGRAM */
 750    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 751
 752    /* BRW_NEW_GEOMETRY_PROGRAM */
 753    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 754
 755    /* BRW_NEW_FRAGMENT_PROGRAM */
 756    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 757
 758    /* _NEW_TEXTURE */
 759    update_stage_texture_surfaces(brw, vs,
 760                                  brw->vs.base.surf_offset +
 761                                  SURF_INDEX_VEC4_TEXTURE(0),
 762                                  false);
 763    update_stage_texture_surfaces(brw, gs,
 764                                  brw->gs.base.surf_offset +
 765                                  SURF_INDEX_VEC4_TEXTURE(0),
 766                                  false);
 767    update_stage_texture_surfaces(brw, fs,
 768                                  brw->wm.base.surf_offset +
 769                                  SURF_INDEX_TEXTURE(0),
 770                                  false);
 771
 772    /* emit alternate set of surface state for gather. this
 773     * allows the surface format to be overriden for only the
 774     * gather4 messages. */
 775    if (vs && vs->UsesGather)
 776       update_stage_texture_surfaces(brw, vs,
 777                                     brw->vs.base.surf_offset +
 778                                     SURF_INDEX_VEC4_GATHER_TEXTURE(0),
 779                                     true);
 780    if (gs && gs->UsesGather)
 781       update_stage_texture_surfaces(brw, gs,
 782                                     brw->gs.base.surf_offset +
 783                                     SURF_INDEX_VEC4_GATHER_TEXTURE(0),
 784                                     true);
 785    if (fs && fs->UsesGather)
 786       update_stage_texture_surfaces(brw, fs,
 787                                     brw->wm.base.surf_offset +
 788                                     SURF_INDEX_GATHER_TEXTURE(0),
 789                                     true);
 790
 791    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 792 }
 793
 794 const struct brw_tracked_state brw_texture_surfaces = {
 795    .dirty = {
 796       .mesa = _NEW_TEXTURE,
 797       .brw = BRW_NEW_BATCH |
 798              BRW_NEW_VERTEX_PROGRAM |
 799              BRW_NEW_GEOMETRY_PROGRAM |
 800              BRW_NEW_FRAGMENT_PROGRAM,
 801       .cache = 0
 802    },
 803    .emit = brw_update_texture_surfaces,
 804 };
 805
 806 void
 807 brw_upload_ubo_surfaces(struct brw_context *brw,
 808                         struct gl_shader *shader,
 809                         uint32_t *surf_offsets)
 810 {
 811    struct gl_context *ctx = &brw->ctx;
 812
 813    if (!shader)
 814       return;
 815
 816    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 817       struct gl_uniform_buffer_binding *binding;
 818       struct intel_buffer_object *intel_bo;
 819
 820       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 821       intel_bo = intel_buffer_object(binding->BufferObject);
 822       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 823
 824       /* Because behavior for referencing outside of the binding's size in the
 825        * glBindBufferRange case is undefined, we can just bind the whole buffer
 826        * glBindBufferBase wants and be a correct implementation.
 827        */
 828       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 829                                         bo->size - binding->Offset,
 830                                         &surf_offsets[i],
 831                                         shader->Type == GL_FRAGMENT_SHADER);
 832    }
 833
 834    if (shader->NumUniformBlocks)
 835       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 836 }
 837
 838 static void
 839 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 840 {
 841    struct gl_context *ctx = &brw->ctx;
 842    /* _NEW_PROGRAM */
 843    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 844
 845    if (!prog)
 846       return;
 847
 848    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 849                            &brw->wm.base.surf_offset[SURF_INDEX_WM_UBO(0)]);
 850 }
 851
 852 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 853    .dirty = {
 854       .mesa = _NEW_PROGRAM,
 855       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 856       .cache = 0,
 857    },
 858    .emit = brw_upload_wm_ubo_surfaces,
 859 };
 860
 861 void
 862 gen4_init_vtable_surface_functions(struct brw_context *brw)
 863 {
 864    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 865    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 866    brw->vtbl.update_null_renderbuffer_surface =
 867       brw_update_null_renderbuffer_surface;
 868    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 869 }