src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193 static void
 194 gen4_emit_buffer_surface_state(struct brw_context *brw,
 195                                uint32_t *out_offset,
 196                                drm_intel_bo *bo,
 197                                unsigned buffer_offset,
 198                                unsigned surface_format,
 199                                unsigned buffer_size,
 200                                unsigned pitch)
 201 {
 202    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 203                                     6 * 4, 32, out_offset);
 204    memset(surf, 0, 6 * 4);
 205
 206    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 207              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 208              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 209    surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
 210    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 211              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 212    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 213              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 214
 215    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 216     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 217     * physical cache.  It is mapped in hardware to the sampler cache."
 218     */
 219    if (bo) {
 220       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 221                               bo, buffer_offset,
 222                               I915_GEM_DOMAIN_SAMPLER, 0);
 223    }
 224 }
 225
 226 static void
 227 brw_update_buffer_texture_surface(struct gl_context *ctx,
 228                                   unsigned unit,
 229                                   uint32_t *surf_offset)
 230 {
 231    struct brw_context *brw = brw_context(ctx);
 232    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 233    struct intel_buffer_object *intel_obj =
 234       intel_buffer_object(tObj->BufferObject);
 235    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 236    gl_format format = tObj->_BufferObjectFormat;
 237    uint32_t brw_format = brw_format_for_mesa_format(format);
 238    int texel_size = _mesa_get_format_bytes(format);
 239    int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
 240
 241    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 242       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 243                     _mesa_get_format_name(format));
 244    }
 245
 246    gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
 247                                   brw_format,
 248                                   w, texel_size);
 249 }
 250
 251 static void
 252 brw_update_texture_surface(struct gl_context *ctx,
 253                            unsigned unit,
 254                            uint32_t *surf_offset,
 255                            bool for_gather)
 256 {
 257    struct brw_context *brw = brw_context(ctx);
 258    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 259    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 260    struct intel_mipmap_tree *mt = intelObj->mt;
 261    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 262    uint32_t *surf;
 263
 264    if (tObj->Target == GL_TEXTURE_BUFFER) {
 265       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 266       return;
 267    }
 268
 269    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 270                           6 * 4, 32, surf_offset);
 271
 272    (void) for_gather;   /* no w/a to apply for this gen */
 273
 274    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 275               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 276               BRW_SURFACE_CUBEFACE_ENABLES |
 277               (translate_tex_format(brw,
 278                                     mt->format,
 279                                     tObj->DepthMode,
 280                                     sampler->sRGBDecode) <<
 281                BRW_SURFACE_FORMAT_SHIFT));
 282
 283    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 284
 285    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 286               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 287               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 288
 289    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 290               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 291               (intelObj->mt->region->pitch - 1) <<
 292               BRW_SURFACE_PITCH_SHIFT);
 293
 294    surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
 295               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 296
 297    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 298
 299    /* Emit relocation to surface contents */
 300    drm_intel_bo_emit_reloc(brw->batch.bo,
 301                            *surf_offset + 4,
 302                            intelObj->mt->region->bo,
 303                            surf[1] - intelObj->mt->region->bo->offset,
 304                            I915_GEM_DOMAIN_SAMPLER, 0);
 305 }
 306
 307 /**
 308  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 309  * read from this buffer with Data Port Read instructions/messages.
 310  */
 311 static void
 312 brw_create_constant_surface(struct brw_context *brw,
 313                             drm_intel_bo *bo,
 314                             uint32_t offset,
 315                             uint32_t size,
 316                             uint32_t *out_offset,
 317                             bool dword_pitch)
 318 {
 319    uint32_t stride = dword_pitch ? 4 : 16;
 320    uint32_t elements = ALIGN(size, stride) / stride;
 321
 322    gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
 323                                   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 324                                   elements, stride);
 325 }
 326
 327 /**
 328  * Set up a binding table entry for use by stream output logic (transform
 329  * feedback).
 330  *
 331  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 332  */
 333 void
 334 brw_update_sol_surface(struct brw_context *brw,
 335                        struct gl_buffer_object *buffer_obj,
 336                        uint32_t *out_offset, unsigned num_vector_components,
 337                        unsigned stride_dwords, unsigned offset_dwords)
 338 {
 339    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 340    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 341    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 342                                     out_offset);
 343    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 344    uint32_t offset_bytes = 4 * offset_dwords;
 345    size_t size_dwords = buffer_obj->Size / 4;
 346    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 347
 348    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 349     * too big to map using a single binding table entry?
 350     */
 351    assert((size_dwords - offset_dwords) / stride_dwords
 352           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 353
 354    if (size_dwords > offset_dwords + num_vector_components) {
 355       /* There is room for at least 1 transform feedback output in the buffer.
 356        * Compute the number of additional transform feedback outputs the
 357        * buffer has room for.
 358        */
 359       buffer_size_minus_1 =
 360          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 361    } else {
 362       /* There isn't even room for a single transform feedback output in the
 363        * buffer.  We can't configure the binding table entry to prevent output
 364        * entirely; we'll have to rely on the geometry shader to detect
 365        * overflow.  But to minimize the damage in case of a bug, set up the
 366        * binding table entry to just allow a single output.
 367        */
 368       buffer_size_minus_1 = 0;
 369    }
 370    width = buffer_size_minus_1 & 0x7f;
 371    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 372    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 373
 374    switch (num_vector_components) {
 375    case 1:
 376       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 377       break;
 378    case 2:
 379       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 380       break;
 381    case 3:
 382       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 383       break;
 384    case 4:
 385       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 386       break;
 387    default:
 388       assert(!"Invalid vector size for transform feedback output");
 389       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 390       break;
 391    }
 392
 393    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 394       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 395       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 396       BRW_SURFACE_RC_READ_WRITE;
 397    surf[1] = bo->offset + offset_bytes; /* reloc */
 398    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 399               height << BRW_SURFACE_HEIGHT_SHIFT);
 400    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 401               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 402    surf[4] = 0;
 403    surf[5] = 0;
 404
 405    /* Emit relocation to surface contents. */
 406    drm_intel_bo_emit_reloc(brw->batch.bo,
 407                            *out_offset + 4,
 408                            bo, offset_bytes,
 409                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 410 }
 411
 412 /* Creates a new WM constant buffer reflecting the current fragment program's
 413  * constants, if needed by the fragment program.
 414  *
 415  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 416  * state atom.
 417  */
 418 static void
 419 brw_upload_wm_pull_constants(struct brw_context *brw)
 420 {
 421    struct gl_context *ctx = &brw->ctx;
 422    /* BRW_NEW_FRAGMENT_PROGRAM */
 423    struct brw_fragment_program *fp =
 424       (struct brw_fragment_program *) brw->fragment_program;
 425    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 426    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 427    const int surf_index =
 428       brw->wm.prog_data->base.binding_table.pull_constants_start;
 429    float *constants;
 430    unsigned int i;
 431
 432    _mesa_load_state_parameters(ctx, params);
 433
 434    /* CACHE_NEW_WM_PROG */
 435    if (brw->wm.prog_data->nr_pull_params == 0) {
 436       if (brw->wm.base.const_bo) {
 437          drm_intel_bo_unreference(brw->wm.base.const_bo);
 438          brw->wm.base.const_bo = NULL;
 439          brw->wm.base.surf_offset[surf_index] = 0;
 440          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 441       }
 442       return;
 443    }
 444
 445    drm_intel_bo_unreference(brw->wm.base.const_bo);
 446    brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 447                                          size, 64);
 448
 449    /* _NEW_PROGRAM_CONSTANTS */
 450    drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
 451    constants = brw->wm.base.const_bo->virtual;
 452    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 453       constants[i] = *brw->wm.prog_data->pull_param[i];
 454    }
 455    drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
 456
 457    brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
 458                                      &brw->wm.base.surf_offset[surf_index],
 459                                      true);
 460
 461    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 462 }
 463
 464 const struct brw_tracked_state brw_wm_pull_constants = {
 465    .dirty = {
 466       .mesa = (_NEW_PROGRAM_CONSTANTS),
 467       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 468       .cache = CACHE_NEW_WM_PROG,
 469    },
 470    .emit = brw_upload_wm_pull_constants,
 471 };
 472
 473 static void
 474 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 475 {
 476    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 477     * Notes):
 478     *
 479     *     A null surface will be used in instances where an actual surface is
 480     *     not bound. When a write message is generated to a null surface, no
 481     *     actual surface is written to. When a read message (including any
 482     *     sampling engine message) is generated to a null surface, the result
 483     *     is all zeros. Note that a null surface type is allowed to be used
 484     *     with all messages, even if it is not specificially indicated as
 485     *     supported. All of the remaining fields in surface state are ignored
 486     *     for null surfaces, with the following exceptions:
 487     *
 488     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 489     *       depth buffer’s corresponding state for all render target surfaces,
 490     *       including null.
 491     *
 492     *     - Surface Format must be R8G8B8A8_UNORM.
 493     */
 494    struct gl_context *ctx = &brw->ctx;
 495    uint32_t *surf;
 496    unsigned surface_type = BRW_SURFACE_NULL;
 497    drm_intel_bo *bo = NULL;
 498    unsigned pitch_minus_1 = 0;
 499    uint32_t multisampling_state = 0;
 500    uint32_t surf_index =
 501       brw->wm.prog_data->binding_table.render_target_start + unit;
 502
 503    /* _NEW_BUFFERS */
 504    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 505
 506    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 507                           &brw->wm.base.surf_offset[surf_index]);
 508
 509    if (fb->Visual.samples > 1) {
 510       /* On Gen6, null render targets seem to cause GPU hangs when
 511        * multisampling.  So work around this problem by rendering into dummy
 512        * color buffer.
 513        *
 514        * To decrease the amount of memory needed by the workaround buffer, we
 515        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 516        * the amount of memory needed for the workaround buffer is
 517        * (width_in_tiles + height_in_tiles - 1) tiles.
 518        *
 519        * Note that since the workaround buffer will be interpreted by the
 520        * hardware as an interleaved multisampled buffer, we need to compute
 521        * width_in_tiles and height_in_tiles by dividing the width and height
 522        * by 16 rather than the normal Y-tile size of 32.
 523        */
 524       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 525       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 526       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 527       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 528                          size_needed);
 529       bo = brw->wm.multisampled_null_render_target_bo;
 530       surface_type = BRW_SURFACE_2D;
 531       pitch_minus_1 = 127;
 532       multisampling_state =
 533          brw_get_surface_num_multisamples(fb->Visual.samples);
 534    }
 535
 536    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 537               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 538    if (brw->gen < 6) {
 539       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 540                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 541                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 542                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 543    }
 544    surf[1] = bo ? bo->offset : 0;
 545    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 546               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 547
 548    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 549     * Notes):
 550     *
 551     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 552     */
 553    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 554               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 555    surf[4] = multisampling_state;
 556    surf[5] = 0;
 557
 558    if (bo) {
 559       drm_intel_bo_emit_reloc(brw->batch.bo,
 560                               brw->wm.base.surf_offset[surf_index] + 4,
 561                               bo, 0,
 562                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 563    }
 564 }
 565
 566 /**
 567  * Sets up a surface state structure to point at the given region.
 568  * While it is only used for the front/back buffer currently, it should be
 569  * usable for further buffers when doing ARB_draw_buffer support.
 570  */
 571 static void
 572 brw_update_renderbuffer_surface(struct brw_context *brw,
 573                                 struct gl_renderbuffer *rb,
 574                                 bool layered,
 575                                 unsigned int unit)
 576 {
 577    struct gl_context *ctx = &brw->ctx;
 578    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 579    struct intel_mipmap_tree *mt = irb->mt;
 580    struct intel_region *region;
 581    uint32_t *surf;
 582    uint32_t tile_x, tile_y;
 583    uint32_t format = 0;
 584    /* _NEW_BUFFERS */
 585    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 586    uint32_t surf_index =
 587       brw->wm.prog_data->binding_table.render_target_start + unit;
 588
 589    assert(!layered);
 590
 591    if (rb->TexImage && !brw->has_surface_tile_offset) {
 592       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 593
 594       if (tile_x != 0 || tile_y != 0) {
 595          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 596           * destination in a miptree unless you actually setup your renderbuffer
 597           * as a miptree and used the fragile lod/array_index/etc. controls to
 598           * select the image.  So, instead, we just make a new single-level
 599           * miptree and render into that.
 600           */
 601          intel_renderbuffer_move_to_temp(brw, irb, false);
 602          mt = irb->mt;
 603       }
 604    }
 605
 606    intel_miptree_used_for_rendering(irb->mt);
 607
 608    region = irb->mt->region;
 609
 610    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 611                           &brw->wm.base.surf_offset[surf_index]);
 612
 613    format = brw->render_target_format[rb_format];
 614    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 615       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 616                     __FUNCTION__, _mesa_get_format_name(rb_format));
 617    }
 618
 619    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 620               format << BRW_SURFACE_FORMAT_SHIFT);
 621
 622    /* reloc */
 623    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 624               region->bo->offset);
 625
 626    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 627               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 628
 629    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 630               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 631
 632    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 633
 634    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 635    /* Note that the low bits of these fields are missing, so
 636     * there's the possibility of getting in trouble.
 637     */
 638    assert(tile_x % 4 == 0);
 639    assert(tile_y % 2 == 0);
 640    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 641               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 642               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 643
 644    if (brw->gen < 6) {
 645       /* _NEW_COLOR */
 646       if (!ctx->Color.ColorLogicOpEnabled &&
 647           (ctx->Color.BlendEnabled & (1 << unit)))
 648          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 649
 650       if (!ctx->Color.ColorMask[unit][0])
 651          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 652       if (!ctx->Color.ColorMask[unit][1])
 653          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 654       if (!ctx->Color.ColorMask[unit][2])
 655          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 656
 657       /* As mentioned above, disable writes to the alpha component when the
 658        * renderbuffer is XRGB.
 659        */
 660       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 661           !ctx->Color.ColorMask[unit][3]) {
 662          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 663       }
 664    }
 665
 666    drm_intel_bo_emit_reloc(brw->batch.bo,
 667                            brw->wm.base.surf_offset[surf_index] + 4,
 668                            region->bo,
 669                            surf[1] - region->bo->offset,
 670                            I915_GEM_DOMAIN_RENDER,
 671                            I915_GEM_DOMAIN_RENDER);
 672 }
 673
 674 /**
 675  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 676  */
 677 static void
 678 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 679 {
 680    struct gl_context *ctx = &brw->ctx;
 681    GLuint i;
 682
 683    /* _NEW_BUFFERS | _NEW_COLOR */
 684    /* Update surfaces for drawing buffers */
 685    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 686       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 687          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 688             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 689                                                   ctx->DrawBuffer->Layered, i);
 690          } else {
 691             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 692          }
 693       }
 694    } else {
 695       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 696    }
 697    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 698 }
 699
 700 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 701    .dirty = {
 702       .mesa = (_NEW_COLOR |
 703                _NEW_BUFFERS),
 704       .brw = BRW_NEW_BATCH,
 705       .cache = 0
 706    },
 707    .emit = brw_update_renderbuffer_surfaces,
 708 };
 709
 710 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 711    .dirty = {
 712       .mesa = _NEW_BUFFERS,
 713       .brw = BRW_NEW_BATCH,
 714       .cache = 0
 715    },
 716    .emit = brw_update_renderbuffer_surfaces,
 717 };
 718
 719
 720 static void
 721 update_stage_texture_surfaces(struct brw_context *brw,
 722                               const struct gl_program *prog,
 723                               struct brw_stage_state *stage_state,
 724                               bool for_gather)
 725 {
 726    if (!prog)
 727       return;
 728
 729    struct gl_context *ctx = &brw->ctx;
 730
 731    uint32_t *surf_offset = stage_state->surf_offset;
 732    if (for_gather)
 733       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 734    else
 735       surf_offset += stage_state->prog_data->binding_table.texture_start;
 736
 737    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 738    for (unsigned s = 0; s < num_samplers; s++) {
 739       surf_offset[s] = 0;
 740
 741       if (prog->SamplersUsed & (1 << s)) {
 742          const unsigned unit = prog->SamplerUnits[s];
 743
 744          /* _NEW_TEXTURE */
 745          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 746             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 747          }
 748       }
 749    }
 750 }
 751
 752
 753 /**
 754  * Construct SURFACE_STATE objects for enabled textures.
 755  */
 756 static void
 757 brw_update_texture_surfaces(struct brw_context *brw)
 758 {
 759    /* BRW_NEW_VERTEX_PROGRAM */
 760    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 761
 762    /* BRW_NEW_GEOMETRY_PROGRAM */
 763    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 764
 765    /* BRW_NEW_FRAGMENT_PROGRAM */
 766    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 767
 768    /* _NEW_TEXTURE */
 769    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 770    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 771    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 772
 773    /* emit alternate set of surface state for gather. this
 774     * allows the surface format to be overriden for only the
 775     * gather4 messages. */
 776    if (vs && vs->UsesGather)
 777       update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 778    if (gs && gs->UsesGather)
 779       update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 780    if (fs && fs->UsesGather)
 781       update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 782
 783    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 784 }
 785
 786 const struct brw_tracked_state brw_texture_surfaces = {
 787    .dirty = {
 788       .mesa = _NEW_TEXTURE,
 789       .brw = BRW_NEW_BATCH |
 790              BRW_NEW_VERTEX_PROGRAM |
 791              BRW_NEW_GEOMETRY_PROGRAM |
 792              BRW_NEW_FRAGMENT_PROGRAM,
 793       .cache = 0
 794    },
 795    .emit = brw_update_texture_surfaces,
 796 };
 797
 798 void
 799 brw_upload_ubo_surfaces(struct brw_context *brw,
 800                         struct gl_shader *shader,
 801                         struct brw_stage_state *stage_state,
 802                         struct brw_stage_prog_data *prog_data)
 803 {
 804    struct gl_context *ctx = &brw->ctx;
 805
 806    if (!shader)
 807       return;
 808
 809    uint32_t *surf_offsets =
 810       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 811
 812    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 813       struct gl_uniform_buffer_binding *binding;
 814       struct intel_buffer_object *intel_bo;
 815
 816       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 817       intel_bo = intel_buffer_object(binding->BufferObject);
 818       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 819
 820       /* Because behavior for referencing outside of the binding's size in the
 821        * glBindBufferRange case is undefined, we can just bind the whole buffer
 822        * glBindBufferBase wants and be a correct implementation.
 823        */
 824       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 825                                         bo->size - binding->Offset,
 826                                         &surf_offsets[i],
 827                                         shader->Type == GL_FRAGMENT_SHADER);
 828    }
 829
 830    if (shader->NumUniformBlocks)
 831       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 832 }
 833
 834 static void
 835 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 836 {
 837    struct gl_context *ctx = &brw->ctx;
 838    /* _NEW_PROGRAM */
 839    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 840
 841    if (!prog)
 842       return;
 843
 844    /* CACHE_NEW_WM_PROG */
 845    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 846                            &brw->wm.base, &brw->wm.prog_data->base);
 847 }
 848
 849 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 850    .dirty = {
 851       .mesa = _NEW_PROGRAM,
 852       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 853       .cache = CACHE_NEW_WM_PROG,
 854    },
 855    .emit = brw_upload_wm_ubo_surfaces,
 856 };
 857
 858 void
 859 gen4_init_vtable_surface_functions(struct brw_context *brw)
 860 {
 861    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 862    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 863    brw->vtbl.update_null_renderbuffer_surface =
 864       brw_update_null_renderbuffer_surface;
 865    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 866 }