src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    /* If the texture's format is alpha-only, force R, G, and B to
 191     * 0.0. Similarly, if the texture's format has no alpha channel,
 192     * force the alpha value read to 1.0. This allows for the
 193     * implementation to use an RGBA texture for any of these formats
 194     * without leaking any unexpected values.
 195     */
 196    switch (img->_BaseFormat) {
 197    case GL_ALPHA:
 198       swizzles[0] = SWIZZLE_ZERO;
 199       swizzles[1] = SWIZZLE_ZERO;
 200       swizzles[2] = SWIZZLE_ZERO;
 201       break;
 202    case GL_RED:
 203    case GL_RG:
 204    case GL_RGB:
 205       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 206          swizzles[3] = SWIZZLE_ONE;
 207       break;
 208    }
 209
 210    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 211                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 212                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 213                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 214 }
 215
 216 static void
 217 gen4_emit_buffer_surface_state(struct brw_context *brw,
 218                                uint32_t *out_offset,
 219                                drm_intel_bo *bo,
 220                                unsigned buffer_offset,
 221                                unsigned surface_format,
 222                                unsigned buffer_size,
 223                                unsigned pitch,
 224                                unsigned mocs,
 225                                bool rw)
 226 {
 227    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 228                                     6 * 4, 32, out_offset);
 229    memset(surf, 0, 6 * 4);
 230
 231    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 232              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 233              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 234    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 235    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 236              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 237    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 238              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 239
 240    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 241     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 242     * physical cache.  It is mapped in hardware to the sampler cache."
 243     */
 244    if (bo) {
 245       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 246                               bo, buffer_offset,
 247                               I915_GEM_DOMAIN_SAMPLER,
 248                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 249    }
 250 }
 251
 252 void
 253 brw_update_buffer_texture_surface(struct gl_context *ctx,
 254                                   unsigned unit,
 255                                   uint32_t *surf_offset)
 256 {
 257    struct brw_context *brw = brw_context(ctx);
 258    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 259    struct intel_buffer_object *intel_obj =
 260       intel_buffer_object(tObj->BufferObject);
 261    uint32_t size = tObj->BufferSize;
 262    drm_intel_bo *bo = NULL;
 263    mesa_format format = tObj->_BufferObjectFormat;
 264    uint32_t brw_format = brw_format_for_mesa_format(format);
 265    int texel_size = _mesa_get_format_bytes(format);
 266
 267    if (intel_obj) {
 268       size = MIN2(size, intel_obj->Base.Size);
 269       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 270    }
 271
 272    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 273       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 274                     _mesa_get_format_name(format));
 275    }
 276
 277    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 278                                        tObj->BufferOffset,
 279                                        brw_format,
 280                                        size / texel_size,
 281                                        texel_size,
 282                                        0, /* mocs */
 283                                        false /* rw */);
 284 }
 285
 286 static void
 287 brw_update_texture_surface(struct gl_context *ctx,
 288                            unsigned unit,
 289                            uint32_t *surf_offset,
 290                            bool for_gather)
 291 {
 292    struct brw_context *brw = brw_context(ctx);
 293    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 294    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 295    struct intel_mipmap_tree *mt = intelObj->mt;
 296    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 297    uint32_t *surf;
 298
 299    /* BRW_NEW_UNIFORM_BUFFER */
 300    if (tObj->Target == GL_TEXTURE_BUFFER) {
 301       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 302       return;
 303    }
 304
 305    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 306                           6 * 4, 32, surf_offset);
 307
 308    uint32_t tex_format = translate_tex_format(brw, mt->format,
 309                                               sampler->sRGBDecode);
 310
 311    if (for_gather) {
 312       /* Sandybridge's gather4 message is broken for integer formats.
 313        * To work around this, we pretend the surface is UNORM for
 314        * 8 or 16-bit formats, and emit shader instructions to recover
 315        * the real INT/UINT value.  For 32-bit formats, we pretend
 316        * the surface is FLOAT, and simply reinterpret the resulting
 317        * bits.
 318        */
 319       switch (tex_format) {
 320       case BRW_SURFACEFORMAT_R8_SINT:
 321       case BRW_SURFACEFORMAT_R8_UINT:
 322          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 323          break;
 324
 325       case BRW_SURFACEFORMAT_R16_SINT:
 326       case BRW_SURFACEFORMAT_R16_UINT:
 327          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 328          break;
 329
 330       case BRW_SURFACEFORMAT_R32_SINT:
 331       case BRW_SURFACEFORMAT_R32_UINT:
 332          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 333          break;
 334
 335       default:
 336          break;
 337       }
 338    }
 339
 340    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 341               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 342               BRW_SURFACE_CUBEFACE_ENABLES |
 343               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 344
 345    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 346
 347    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 348               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 349               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 350
 351    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 352               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 353               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 354
 355    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 356               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 357
 358    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 359
 360    /* Emit relocation to surface contents */
 361    drm_intel_bo_emit_reloc(brw->batch.bo,
 362                            *surf_offset + 4,
 363                            mt->bo,
 364                            surf[1] - mt->bo->offset64,
 365                            I915_GEM_DOMAIN_SAMPLER, 0);
 366 }
 367
 368 /**
 369  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 370  * read from this buffer with Data Port Read instructions/messages.
 371  */
 372 void
 373 brw_create_constant_surface(struct brw_context *brw,
 374                             drm_intel_bo *bo,
 375                             uint32_t offset,
 376                             uint32_t size,
 377                             uint32_t *out_offset,
 378                             bool dword_pitch)
 379 {
 380    uint32_t stride = dword_pitch ? 4 : 16;
 381    uint32_t elements = ALIGN(size, stride) / stride;
 382
 383    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 384                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 385                                        elements, stride, 0, false);
 386 }
 387
 388 /**
 389  * Set up a binding table entry for use by stream output logic (transform
 390  * feedback).
 391  *
 392  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 393  */
 394 void
 395 brw_update_sol_surface(struct brw_context *brw,
 396                        struct gl_buffer_object *buffer_obj,
 397                        uint32_t *out_offset, unsigned num_vector_components,
 398                        unsigned stride_dwords, unsigned offset_dwords)
 399 {
 400    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 401    uint32_t offset_bytes = 4 * offset_dwords;
 402    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 403                                              offset_bytes,
 404                                              buffer_obj->Size - offset_bytes);
 405    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 406                                     out_offset);
 407    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 408    size_t size_dwords = buffer_obj->Size / 4;
 409    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 410
 411    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 412     * too big to map using a single binding table entry?
 413     */
 414    assert((size_dwords - offset_dwords) / stride_dwords
 415           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 416
 417    if (size_dwords > offset_dwords + num_vector_components) {
 418       /* There is room for at least 1 transform feedback output in the buffer.
 419        * Compute the number of additional transform feedback outputs the
 420        * buffer has room for.
 421        */
 422       buffer_size_minus_1 =
 423          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 424    } else {
 425       /* There isn't even room for a single transform feedback output in the
 426        * buffer.  We can't configure the binding table entry to prevent output
 427        * entirely; we'll have to rely on the geometry shader to detect
 428        * overflow.  But to minimize the damage in case of a bug, set up the
 429        * binding table entry to just allow a single output.
 430        */
 431       buffer_size_minus_1 = 0;
 432    }
 433    width = buffer_size_minus_1 & 0x7f;
 434    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 435    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 436
 437    switch (num_vector_components) {
 438    case 1:
 439       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 440       break;
 441    case 2:
 442       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 443       break;
 444    case 3:
 445       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 446       break;
 447    case 4:
 448       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 449       break;
 450    default:
 451       unreachable("Invalid vector size for transform feedback output");
 452    }
 453
 454    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 455       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 456       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 457       BRW_SURFACE_RC_READ_WRITE;
 458    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 459    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 460               height << BRW_SURFACE_HEIGHT_SHIFT);
 461    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 462               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 463    surf[4] = 0;
 464    surf[5] = 0;
 465
 466    /* Emit relocation to surface contents. */
 467    drm_intel_bo_emit_reloc(brw->batch.bo,
 468                            *out_offset + 4,
 469                            bo, offset_bytes,
 470                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 471 }
 472
 473 /* Creates a new WM constant buffer reflecting the current fragment program's
 474  * constants, if needed by the fragment program.
 475  *
 476  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 477  * state atom.
 478  */
 479 static void
 480 brw_upload_wm_pull_constants(struct brw_context *brw)
 481 {
 482    struct brw_stage_state *stage_state = &brw->wm.base;
 483    /* BRW_NEW_FRAGMENT_PROGRAM */
 484    struct brw_fragment_program *fp =
 485       (struct brw_fragment_program *) brw->fragment_program;
 486    /* CACHE_NEW_WM_PROG */
 487    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 488
 489    /* _NEW_PROGRAM_CONSTANTS */
 490    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 491                              stage_state, prog_data, true);
 492 }
 493
 494 const struct brw_tracked_state brw_wm_pull_constants = {
 495    .dirty = {
 496       .mesa = (_NEW_PROGRAM_CONSTANTS),
 497       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 498       .cache = CACHE_NEW_WM_PROG,
 499    },
 500    .emit = brw_upload_wm_pull_constants,
 501 };
 502
 503 /**
 504  * Creates a null renderbuffer surface.
 505  *
 506  * This is used when the shader doesn't write to any color output.  An FB
 507  * write to target 0 will still be emitted, because that's how the thread is
 508  * terminated (and computed depth is returned), so we need to have the
 509  * hardware discard the target 0 color output..
 510  */
 511 static void
 512 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 513 {
 514    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 515     * Notes):
 516     *
 517     *     A null surface will be used in instances where an actual surface is
 518     *     not bound. When a write message is generated to a null surface, no
 519     *     actual surface is written to. When a read message (including any
 520     *     sampling engine message) is generated to a null surface, the result
 521     *     is all zeros. Note that a null surface type is allowed to be used
 522     *     with all messages, even if it is not specificially indicated as
 523     *     supported. All of the remaining fields in surface state are ignored
 524     *     for null surfaces, with the following exceptions:
 525     *
 526     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 527     *       depth buffer’s corresponding state for all render target surfaces,
 528     *       including null.
 529     *
 530     *     - Surface Format must be R8G8B8A8_UNORM.
 531     */
 532    struct gl_context *ctx = &brw->ctx;
 533    uint32_t *surf;
 534    unsigned surface_type = BRW_SURFACE_NULL;
 535    drm_intel_bo *bo = NULL;
 536    unsigned pitch_minus_1 = 0;
 537    uint32_t multisampling_state = 0;
 538    uint32_t surf_index =
 539       brw->wm.prog_data->binding_table.render_target_start + unit;
 540
 541    /* _NEW_BUFFERS */
 542    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 543
 544    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 545                           &brw->wm.base.surf_offset[surf_index]);
 546
 547    if (fb->Visual.samples > 1) {
 548       /* On Gen6, null render targets seem to cause GPU hangs when
 549        * multisampling.  So work around this problem by rendering into dummy
 550        * color buffer.
 551        *
 552        * To decrease the amount of memory needed by the workaround buffer, we
 553        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 554        * the amount of memory needed for the workaround buffer is
 555        * (width_in_tiles + height_in_tiles - 1) tiles.
 556        *
 557        * Note that since the workaround buffer will be interpreted by the
 558        * hardware as an interleaved multisampled buffer, we need to compute
 559        * width_in_tiles and height_in_tiles by dividing the width and height
 560        * by 16 rather than the normal Y-tile size of 32.
 561        */
 562       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 563       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 564       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 565       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 566                          size_needed);
 567       bo = brw->wm.multisampled_null_render_target_bo;
 568       surface_type = BRW_SURFACE_2D;
 569       pitch_minus_1 = 127;
 570       multisampling_state =
 571          brw_get_surface_num_multisamples(fb->Visual.samples);
 572    }
 573
 574    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 575               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 576    if (brw->gen < 6) {
 577       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 578                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 579                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 580                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 581    }
 582    surf[1] = bo ? bo->offset64 : 0;
 583    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 584               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 585
 586    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 587     * Notes):
 588     *
 589     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 590     */
 591    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 592               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 593    surf[4] = multisampling_state;
 594    surf[5] = 0;
 595
 596    if (bo) {
 597       drm_intel_bo_emit_reloc(brw->batch.bo,
 598                               brw->wm.base.surf_offset[surf_index] + 4,
 599                               bo, 0,
 600                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 601    }
 602 }
 603
 604 /**
 605  * Sets up a surface state structure to point at the given region.
 606  * While it is only used for the front/back buffer currently, it should be
 607  * usable for further buffers when doing ARB_draw_buffer support.
 608  */
 609 static void
 610 brw_update_renderbuffer_surface(struct brw_context *brw,
 611                                 struct gl_renderbuffer *rb,
 612                                 bool layered,
 613                                 unsigned int unit)
 614 {
 615    struct gl_context *ctx = &brw->ctx;
 616    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 617    struct intel_mipmap_tree *mt = irb->mt;
 618    uint32_t *surf;
 619    uint32_t tile_x, tile_y;
 620    uint32_t format = 0;
 621    /* _NEW_BUFFERS */
 622    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 623    uint32_t surf_index =
 624       brw->wm.prog_data->binding_table.render_target_start + unit;
 625
 626    assert(!layered);
 627
 628    if (rb->TexImage && !brw->has_surface_tile_offset) {
 629       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 630
 631       if (tile_x != 0 || tile_y != 0) {
 632          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 633           * destination in a miptree unless you actually setup your renderbuffer
 634           * as a miptree and used the fragile lod/array_index/etc. controls to
 635           * select the image.  So, instead, we just make a new single-level
 636           * miptree and render into that.
 637           */
 638          intel_renderbuffer_move_to_temp(brw, irb, false);
 639          mt = irb->mt;
 640       }
 641    }
 642
 643    intel_miptree_used_for_rendering(irb->mt);
 644
 645    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 646                           &brw->wm.base.surf_offset[surf_index]);
 647
 648    format = brw->render_target_format[rb_format];
 649    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 650       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 651                     __FUNCTION__, _mesa_get_format_name(rb_format));
 652    }
 653
 654    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 655               format << BRW_SURFACE_FORMAT_SHIFT);
 656
 657    /* reloc */
 658    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 659               mt->bo->offset64);
 660
 661    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 662               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 663
 664    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 665               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 666
 667    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 668
 669    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 670    /* Note that the low bits of these fields are missing, so
 671     * there's the possibility of getting in trouble.
 672     */
 673    assert(tile_x % 4 == 0);
 674    assert(tile_y % 2 == 0);
 675    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 676               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 677               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 678
 679    if (brw->gen < 6) {
 680       /* _NEW_COLOR */
 681       if (!ctx->Color.ColorLogicOpEnabled &&
 682           (ctx->Color.BlendEnabled & (1 << unit)))
 683          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 684
 685       if (!ctx->Color.ColorMask[unit][0])
 686          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 687       if (!ctx->Color.ColorMask[unit][1])
 688          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 689       if (!ctx->Color.ColorMask[unit][2])
 690          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 691
 692       /* As mentioned above, disable writes to the alpha component when the
 693        * renderbuffer is XRGB.
 694        */
 695       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 696           !ctx->Color.ColorMask[unit][3]) {
 697          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 698       }
 699    }
 700
 701    drm_intel_bo_emit_reloc(brw->batch.bo,
 702                            brw->wm.base.surf_offset[surf_index] + 4,
 703                            mt->bo,
 704                            surf[1] - mt->bo->offset64,
 705                            I915_GEM_DOMAIN_RENDER,
 706                            I915_GEM_DOMAIN_RENDER);
 707 }
 708
 709 /**
 710  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 711  */
 712 static void
 713 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 714 {
 715    struct gl_context *ctx = &brw->ctx;
 716    GLuint i;
 717
 718    /* _NEW_BUFFERS | _NEW_COLOR */
 719    /* Update surfaces for drawing buffers */
 720    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 721       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 722          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 723             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 724                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 725          } else {
 726             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 727          }
 728       }
 729    } else {
 730       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 731    }
 732    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 733 }
 734
 735 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 736    .dirty = {
 737       .mesa = (_NEW_COLOR |
 738                _NEW_BUFFERS),
 739       .brw = BRW_NEW_BATCH,
 740       .cache = 0
 741    },
 742    .emit = brw_update_renderbuffer_surfaces,
 743 };
 744
 745 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 746    .dirty = {
 747       .mesa = _NEW_BUFFERS,
 748       .brw = BRW_NEW_BATCH,
 749       .cache = 0
 750    },
 751    .emit = brw_update_renderbuffer_surfaces,
 752 };
 753
 754
 755 static void
 756 update_stage_texture_surfaces(struct brw_context *brw,
 757                               const struct gl_program *prog,
 758                               struct brw_stage_state *stage_state,
 759                               bool for_gather)
 760 {
 761    if (!prog)
 762       return;
 763
 764    struct gl_context *ctx = &brw->ctx;
 765
 766    uint32_t *surf_offset = stage_state->surf_offset;
 767    if (for_gather)
 768       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 769    else
 770       surf_offset += stage_state->prog_data->binding_table.texture_start;
 771
 772    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 773    for (unsigned s = 0; s < num_samplers; s++) {
 774       surf_offset[s] = 0;
 775
 776       if (prog->SamplersUsed & (1 << s)) {
 777          const unsigned unit = prog->SamplerUnits[s];
 778
 779          /* _NEW_TEXTURE */
 780          if (ctx->Texture.Unit[unit]._Current) {
 781             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 782          }
 783       }
 784    }
 785 }
 786
 787
 788 /**
 789  * Construct SURFACE_STATE objects for enabled textures.
 790  */
 791 static void
 792 brw_update_texture_surfaces(struct brw_context *brw)
 793 {
 794    /* BRW_NEW_VERTEX_PROGRAM */
 795    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 796
 797    /* BRW_NEW_GEOMETRY_PROGRAM */
 798    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 799
 800    /* BRW_NEW_FRAGMENT_PROGRAM */
 801    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 802
 803    /* _NEW_TEXTURE */
 804    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 805    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 806    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 807
 808    /* emit alternate set of surface state for gather. this
 809     * allows the surface format to be overriden for only the
 810     * gather4 messages. */
 811    if (brw->gen < 8) {
 812       if (vs && vs->UsesGather)
 813          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 814       if (gs && gs->UsesGather)
 815          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 816       if (fs && fs->UsesGather)
 817          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 818    }
 819
 820    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 821 }
 822
 823 const struct brw_tracked_state brw_texture_surfaces = {
 824    .dirty = {
 825       .mesa = _NEW_TEXTURE,
 826       .brw = BRW_NEW_BATCH |
 827              BRW_NEW_UNIFORM_BUFFER |
 828              BRW_NEW_VERTEX_PROGRAM |
 829              BRW_NEW_GEOMETRY_PROGRAM |
 830              BRW_NEW_FRAGMENT_PROGRAM,
 831       .cache = 0
 832    },
 833    .emit = brw_update_texture_surfaces,
 834 };
 835
 836 void
 837 brw_upload_ubo_surfaces(struct brw_context *brw,
 838                         struct gl_shader *shader,
 839                         struct brw_stage_state *stage_state,
 840                         struct brw_stage_prog_data *prog_data)
 841 {
 842    struct gl_context *ctx = &brw->ctx;
 843
 844    if (!shader)
 845       return;
 846
 847    uint32_t *surf_offsets =
 848       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 849
 850    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 851       struct gl_uniform_buffer_binding *binding;
 852       struct intel_buffer_object *intel_bo;
 853
 854       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 855       intel_bo = intel_buffer_object(binding->BufferObject);
 856       drm_intel_bo *bo =
 857          intel_bufferobj_buffer(brw, intel_bo,
 858                                 binding->Offset,
 859                                 binding->BufferObject->Size - binding->Offset);
 860
 861       /* Because behavior for referencing outside of the binding's size in the
 862        * glBindBufferRange case is undefined, we can just bind the whole buffer
 863        * glBindBufferBase wants and be a correct implementation.
 864        */
 865       brw_create_constant_surface(brw, bo, binding->Offset,
 866                                   bo->size - binding->Offset,
 867                                   &surf_offsets[i],
 868                                   shader->Stage == MESA_SHADER_FRAGMENT);
 869    }
 870
 871    if (shader->NumUniformBlocks)
 872       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 873 }
 874
 875 static void
 876 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 877 {
 878    struct gl_context *ctx = &brw->ctx;
 879    /* _NEW_PROGRAM */
 880    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 881
 882    if (!prog)
 883       return;
 884
 885    /* CACHE_NEW_WM_PROG */
 886    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 887                            &brw->wm.base, &brw->wm.prog_data->base);
 888 }
 889
 890 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 891    .dirty = {
 892       .mesa = _NEW_PROGRAM,
 893       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 894       .cache = CACHE_NEW_WM_PROG,
 895    },
 896    .emit = brw_upload_wm_ubo_surfaces,
 897 };
 898
 899 void
 900 brw_upload_abo_surfaces(struct brw_context *brw,
 901                         struct gl_shader_program *prog,
 902                         struct brw_stage_state *stage_state,
 903                         struct brw_stage_prog_data *prog_data)
 904 {
 905    struct gl_context *ctx = &brw->ctx;
 906    uint32_t *surf_offsets =
 907       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 908
 909    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 910       struct gl_atomic_buffer_binding *binding =
 911          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 912       struct intel_buffer_object *intel_bo =
 913          intel_buffer_object(binding->BufferObject);
 914       drm_intel_bo *bo = intel_bufferobj_buffer(
 915          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 916
 917       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 918                                    bo->size - binding->Offset,
 919                                    &surf_offsets[i], true);
 920    }
 921
 922    if (prog->NumUniformBlocks)
 923       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 924 }
 925
 926 static void
 927 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 928 {
 929    struct gl_context *ctx = &brw->ctx;
 930    /* _NEW_PROGRAM */
 931    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 932
 933    if (prog) {
 934       /* CACHE_NEW_WM_PROG */
 935       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 936                               &brw->wm.prog_data->base);
 937    }
 938 }
 939
 940 const struct brw_tracked_state brw_wm_abo_surfaces = {
 941    .dirty = {
 942       .mesa = _NEW_PROGRAM,
 943       .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
 944       .cache = CACHE_NEW_WM_PROG,
 945    },
 946    .emit = brw_upload_wm_abo_surfaces,
 947 };
 948
 949 void
 950 gen4_init_vtable_surface_functions(struct brw_context *brw)
 951 {
 952    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 953    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 954    brw->vtbl.update_null_renderbuffer_surface =
 955       brw_update_null_renderbuffer_surface;
 956    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 957 }