src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       unreachable("not reached");
  77    }
  78 }
  79
  80 uint32_t
  81 brw_get_surface_tiling_bits(uint32_t tiling)
  82 {
  83    switch (tiling) {
  84    case I915_TILING_X:
  85       return BRW_SURFACE_TILED;
  86    case I915_TILING_Y:
  87       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  88    default:
  89       return 0;
  90    }
  91 }
  92
  93
  94 uint32_t
  95 brw_get_surface_num_multisamples(unsigned num_samples)
  96 {
  97    if (num_samples > 1)
  98       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  99    else
 100       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 101 }
 102
 103 void
 104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 105                       bool is_render_target,
 106                       unsigned *width, unsigned *height,
 107                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 108 {
 109    static const unsigned halign_stencil = 8;
 110
 111    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 112     * there are half as many rows.
 113     * In addition, mip-levels are accessed manually by the program and
 114     * therefore the surface is setup to cover all the mip-levels for one slice.
 115     * (Hardware is still used to access individual slices).
 116     */
 117    *tiling = I915_TILING_Y;
 118    *pitch = mt->pitch * 2;
 119    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 120    *height = (mt->total_height / mt->physical_depth0) / 2;
 121
 122    if (is_render_target) {
 123       *format = BRW_SURFACEFORMAT_R8_UINT;
 124    }
 125 }
 126
 127
 128 /**
 129  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 130  * swizzling.
 131  */
 132 int
 133 brw_get_texture_swizzle(const struct gl_context *ctx,
 134                         const struct gl_texture_object *t)
 135 {
 136    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 137
 138    int swizzles[SWIZZLE_NIL + 1] = {
 139       SWIZZLE_X,
 140       SWIZZLE_Y,
 141       SWIZZLE_Z,
 142       SWIZZLE_W,
 143       SWIZZLE_ZERO,
 144       SWIZZLE_ONE,
 145       SWIZZLE_NIL
 146    };
 147
 148    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 149        img->_BaseFormat == GL_DEPTH_STENCIL) {
 150       GLenum depth_mode = t->DepthMode;
 151
 152       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 153        * with depth component data specified with a sized internal format.
 154        * Otherwise, it's left at the old default, GL_LUMINANCE.
 155        */
 156       if (_mesa_is_gles3(ctx) &&
 157           img->InternalFormat != GL_DEPTH_COMPONENT &&
 158           img->InternalFormat != GL_DEPTH_STENCIL) {
 159          depth_mode = GL_RED;
 160       }
 161
 162       switch (depth_mode) {
 163       case GL_ALPHA:
 164          swizzles[0] = SWIZZLE_ZERO;
 165          swizzles[1] = SWIZZLE_ZERO;
 166          swizzles[2] = SWIZZLE_ZERO;
 167          swizzles[3] = SWIZZLE_X;
 168          break;
 169       case GL_LUMINANCE:
 170          swizzles[0] = SWIZZLE_X;
 171          swizzles[1] = SWIZZLE_X;
 172          swizzles[2] = SWIZZLE_X;
 173          swizzles[3] = SWIZZLE_ONE;
 174          break;
 175       case GL_INTENSITY:
 176          swizzles[0] = SWIZZLE_X;
 177          swizzles[1] = SWIZZLE_X;
 178          swizzles[2] = SWIZZLE_X;
 179          swizzles[3] = SWIZZLE_X;
 180          break;
 181       case GL_RED:
 182          swizzles[0] = SWIZZLE_X;
 183          swizzles[1] = SWIZZLE_ZERO;
 184          swizzles[2] = SWIZZLE_ZERO;
 185          swizzles[3] = SWIZZLE_ONE;
 186          break;
 187       }
 188    }
 189
 190    /* If the texture's format is alpha-only, force R, G, and B to
 191     * 0.0. Similarly, if the texture's format has no alpha channel,
 192     * force the alpha value read to 1.0. This allows for the
 193     * implementation to use an RGBA texture for any of these formats
 194     * without leaking any unexpected values.
 195     */
 196    switch (img->_BaseFormat) {
 197    case GL_ALPHA:
 198       swizzles[0] = SWIZZLE_ZERO;
 199       swizzles[1] = SWIZZLE_ZERO;
 200       swizzles[2] = SWIZZLE_ZERO;
 201       break;
 202    case GL_RED:
 203    case GL_RG:
 204    case GL_RGB:
 205       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 206          swizzles[3] = SWIZZLE_ONE;
 207       break;
 208    }
 209
 210    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 211                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 212                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 213                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 214 }
 215
 216 static void
 217 gen4_emit_buffer_surface_state(struct brw_context *brw,
 218                                uint32_t *out_offset,
 219                                drm_intel_bo *bo,
 220                                unsigned buffer_offset,
 221                                unsigned surface_format,
 222                                unsigned buffer_size,
 223                                unsigned pitch,
 224                                unsigned mocs,
 225                                bool rw)
 226 {
 227    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 228                                     6 * 4, 32, out_offset);
 229    memset(surf, 0, 6 * 4);
 230
 231    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 232              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 233              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 234    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 235    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 236              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 237    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 238              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 239
 240    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 241     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 242     * physical cache.  It is mapped in hardware to the sampler cache."
 243     */
 244    if (bo) {
 245       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 246                               bo, buffer_offset,
 247                               I915_GEM_DOMAIN_SAMPLER,
 248                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 249    }
 250 }
 251
 252 void
 253 brw_update_buffer_texture_surface(struct gl_context *ctx,
 254                                   unsigned unit,
 255                                   uint32_t *surf_offset)
 256 {
 257    struct brw_context *brw = brw_context(ctx);
 258    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 259    struct intel_buffer_object *intel_obj =
 260       intel_buffer_object(tObj->BufferObject);
 261    uint32_t size = tObj->BufferSize;
 262    drm_intel_bo *bo = NULL;
 263    mesa_format format = tObj->_BufferObjectFormat;
 264    uint32_t brw_format = brw_format_for_mesa_format(format);
 265    int texel_size = _mesa_get_format_bytes(format);
 266
 267    if (intel_obj) {
 268       size = MIN2(size, intel_obj->Base.Size);
 269       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 270    }
 271
 272    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 273       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 274                     _mesa_get_format_name(format));
 275    }
 276
 277    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 278                                        tObj->BufferOffset,
 279                                        brw_format,
 280                                        size / texel_size,
 281                                        texel_size,
 282                                        0, /* mocs */
 283                                        false /* rw */);
 284 }
 285
 286 static void
 287 brw_update_texture_surface(struct gl_context *ctx,
 288                            unsigned unit,
 289                            uint32_t *surf_offset,
 290                            bool for_gather)
 291 {
 292    struct brw_context *brw = brw_context(ctx);
 293    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 294    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 295    struct intel_mipmap_tree *mt = intelObj->mt;
 296    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 297    uint32_t *surf;
 298
 299    /* BRW_NEW_UNIFORM_BUFFER */
 300    if (tObj->Target == GL_TEXTURE_BUFFER) {
 301       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 302       return;
 303    }
 304
 305    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 306                           6 * 4, 32, surf_offset);
 307
 308    uint32_t tex_format = translate_tex_format(brw, mt->format,
 309                                               sampler->sRGBDecode);
 310
 311    if (for_gather) {
 312       /* Sandybridge's gather4 message is broken for integer formats.
 313        * To work around this, we pretend the surface is UNORM for
 314        * 8 or 16-bit formats, and emit shader instructions to recover
 315        * the real INT/UINT value.  For 32-bit formats, we pretend
 316        * the surface is FLOAT, and simply reinterpret the resulting
 317        * bits.
 318        */
 319       switch (tex_format) {
 320       case BRW_SURFACEFORMAT_R8_SINT:
 321       case BRW_SURFACEFORMAT_R8_UINT:
 322          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 323          break;
 324
 325       case BRW_SURFACEFORMAT_R16_SINT:
 326       case BRW_SURFACEFORMAT_R16_UINT:
 327          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 328          break;
 329
 330       case BRW_SURFACEFORMAT_R32_SINT:
 331       case BRW_SURFACEFORMAT_R32_UINT:
 332          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 333          break;
 334
 335       default:
 336          break;
 337       }
 338    }
 339
 340    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 341               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 342               BRW_SURFACE_CUBEFACE_ENABLES |
 343               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 344
 345    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 346
 347    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 348               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 349               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 350
 351    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 352               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 353               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 354
 355    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 356               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 357
 358    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 359
 360    /* Emit relocation to surface contents */
 361    drm_intel_bo_emit_reloc(brw->batch.bo,
 362                            *surf_offset + 4,
 363                            mt->bo,
 364                            surf[1] - mt->bo->offset64,
 365                            I915_GEM_DOMAIN_SAMPLER, 0);
 366 }
 367
 368 /**
 369  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 370  * read from this buffer with Data Port Read instructions/messages.
 371  */
 372 void
 373 brw_create_constant_surface(struct brw_context *brw,
 374                             drm_intel_bo *bo,
 375                             uint32_t offset,
 376                             uint32_t size,
 377                             uint32_t *out_offset,
 378                             bool dword_pitch)
 379 {
 380    uint32_t stride = dword_pitch ? 4 : 16;
 381    uint32_t elements = ALIGN(size, stride) / stride;
 382
 383    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 384                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 385                                        elements, stride, 0, false);
 386 }
 387
 388 /**
 389  * Set up a binding table entry for use by stream output logic (transform
 390  * feedback).
 391  *
 392  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 393  */
 394 void
 395 brw_update_sol_surface(struct brw_context *brw,
 396                        struct gl_buffer_object *buffer_obj,
 397                        uint32_t *out_offset, unsigned num_vector_components,
 398                        unsigned stride_dwords, unsigned offset_dwords)
 399 {
 400    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 401    uint32_t offset_bytes = 4 * offset_dwords;
 402    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 403                                              offset_bytes,
 404                                              buffer_obj->Size - offset_bytes);
 405    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 406                                     out_offset);
 407    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 408    size_t size_dwords = buffer_obj->Size / 4;
 409    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 410
 411    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 412     * too big to map using a single binding table entry?
 413     */
 414    assert((size_dwords - offset_dwords) / stride_dwords
 415           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 416
 417    if (size_dwords > offset_dwords + num_vector_components) {
 418       /* There is room for at least 1 transform feedback output in the buffer.
 419        * Compute the number of additional transform feedback outputs the
 420        * buffer has room for.
 421        */
 422       buffer_size_minus_1 =
 423          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 424    } else {
 425       /* There isn't even room for a single transform feedback output in the
 426        * buffer.  We can't configure the binding table entry to prevent output
 427        * entirely; we'll have to rely on the geometry shader to detect
 428        * overflow.  But to minimize the damage in case of a bug, set up the
 429        * binding table entry to just allow a single output.
 430        */
 431       buffer_size_minus_1 = 0;
 432    }
 433    width = buffer_size_minus_1 & 0x7f;
 434    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 435    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 436
 437    switch (num_vector_components) {
 438    case 1:
 439       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 440       break;
 441    case 2:
 442       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 443       break;
 444    case 3:
 445       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 446       break;
 447    case 4:
 448       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 449       break;
 450    default:
 451       unreachable("Invalid vector size for transform feedback output");
 452    }
 453
 454    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 455       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 456       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 457       BRW_SURFACE_RC_READ_WRITE;
 458    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 459    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 460               height << BRW_SURFACE_HEIGHT_SHIFT);
 461    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 462               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 463    surf[4] = 0;
 464    surf[5] = 0;
 465
 466    /* Emit relocation to surface contents. */
 467    drm_intel_bo_emit_reloc(brw->batch.bo,
 468                            *out_offset + 4,
 469                            bo, offset_bytes,
 470                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 471 }
 472
 473 /* Creates a new WM constant buffer reflecting the current fragment program's
 474  * constants, if needed by the fragment program.
 475  *
 476  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 477  * state atom.
 478  */
 479 static void
 480 brw_upload_wm_pull_constants(struct brw_context *brw)
 481 {
 482    struct gl_context *ctx = &brw->ctx;
 483    /* BRW_NEW_FRAGMENT_PROGRAM */
 484    struct brw_fragment_program *fp =
 485       (struct brw_fragment_program *) brw->fragment_program;
 486    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 487    const int size = brw->wm.prog_data->base.nr_pull_params * sizeof(float);
 488    const int surf_index =
 489       brw->wm.prog_data->base.binding_table.pull_constants_start;
 490    unsigned int i;
 491
 492    _mesa_load_state_parameters(ctx, params);
 493
 494    /* CACHE_NEW_WM_PROG */
 495    if (brw->wm.prog_data->base.nr_pull_params == 0) {
 496       if (brw->wm.base.surf_offset[surf_index]) {
 497          brw->wm.base.surf_offset[surf_index] = 0;
 498          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 499       }
 500       return;
 501    }
 502
 503    /* _NEW_PROGRAM_CONSTANTS */
 504    drm_intel_bo *const_bo = NULL;
 505    uint32_t const_offset;
 506    float *constants = intel_upload_space(brw, size, 64,
 507                                          &const_bo, &const_offset);
 508    for (i = 0; i < brw->wm.prog_data->base.nr_pull_params; i++) {
 509       constants[i] = *brw->wm.prog_data->base.pull_param[i];
 510    }
 511
 512    brw_create_constant_surface(brw, const_bo, const_offset, size,
 513                                &brw->wm.base.surf_offset[surf_index],
 514                                true);
 515    drm_intel_bo_unreference(const_bo);
 516
 517    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 518 }
 519
 520 const struct brw_tracked_state brw_wm_pull_constants = {
 521    .dirty = {
 522       .mesa = (_NEW_PROGRAM_CONSTANTS),
 523       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 524       .cache = CACHE_NEW_WM_PROG,
 525    },
 526    .emit = brw_upload_wm_pull_constants,
 527 };
 528
 529 static void
 530 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 531 {
 532    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 533     * Notes):
 534     *
 535     *     A null surface will be used in instances where an actual surface is
 536     *     not bound. When a write message is generated to a null surface, no
 537     *     actual surface is written to. When a read message (including any
 538     *     sampling engine message) is generated to a null surface, the result
 539     *     is all zeros. Note that a null surface type is allowed to be used
 540     *     with all messages, even if it is not specificially indicated as
 541     *     supported. All of the remaining fields in surface state are ignored
 542     *     for null surfaces, with the following exceptions:
 543     *
 544     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 545     *       depth buffer’s corresponding state for all render target surfaces,
 546     *       including null.
 547     *
 548     *     - Surface Format must be R8G8B8A8_UNORM.
 549     */
 550    struct gl_context *ctx = &brw->ctx;
 551    uint32_t *surf;
 552    unsigned surface_type = BRW_SURFACE_NULL;
 553    drm_intel_bo *bo = NULL;
 554    unsigned pitch_minus_1 = 0;
 555    uint32_t multisampling_state = 0;
 556    uint32_t surf_index =
 557       brw->wm.prog_data->binding_table.render_target_start + unit;
 558
 559    /* _NEW_BUFFERS */
 560    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 561
 562    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 563                           &brw->wm.base.surf_offset[surf_index]);
 564
 565    if (fb->Visual.samples > 1) {
 566       /* On Gen6, null render targets seem to cause GPU hangs when
 567        * multisampling.  So work around this problem by rendering into dummy
 568        * color buffer.
 569        *
 570        * To decrease the amount of memory needed by the workaround buffer, we
 571        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 572        * the amount of memory needed for the workaround buffer is
 573        * (width_in_tiles + height_in_tiles - 1) tiles.
 574        *
 575        * Note that since the workaround buffer will be interpreted by the
 576        * hardware as an interleaved multisampled buffer, we need to compute
 577        * width_in_tiles and height_in_tiles by dividing the width and height
 578        * by 16 rather than the normal Y-tile size of 32.
 579        */
 580       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 581       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 582       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 583       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 584                          size_needed);
 585       bo = brw->wm.multisampled_null_render_target_bo;
 586       surface_type = BRW_SURFACE_2D;
 587       pitch_minus_1 = 127;
 588       multisampling_state =
 589          brw_get_surface_num_multisamples(fb->Visual.samples);
 590    }
 591
 592    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 593               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 594    if (brw->gen < 6) {
 595       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 596                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 597                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 598                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 599    }
 600    surf[1] = bo ? bo->offset64 : 0;
 601    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 602               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 603
 604    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 605     * Notes):
 606     *
 607     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 608     */
 609    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 610               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 611    surf[4] = multisampling_state;
 612    surf[5] = 0;
 613
 614    if (bo) {
 615       drm_intel_bo_emit_reloc(brw->batch.bo,
 616                               brw->wm.base.surf_offset[surf_index] + 4,
 617                               bo, 0,
 618                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 619    }
 620 }
 621
 622 /**
 623  * Sets up a surface state structure to point at the given region.
 624  * While it is only used for the front/back buffer currently, it should be
 625  * usable for further buffers when doing ARB_draw_buffer support.
 626  */
 627 static void
 628 brw_update_renderbuffer_surface(struct brw_context *brw,
 629                                 struct gl_renderbuffer *rb,
 630                                 bool layered,
 631                                 unsigned int unit)
 632 {
 633    struct gl_context *ctx = &brw->ctx;
 634    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 635    struct intel_mipmap_tree *mt = irb->mt;
 636    uint32_t *surf;
 637    uint32_t tile_x, tile_y;
 638    uint32_t format = 0;
 639    /* _NEW_BUFFERS */
 640    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 641    uint32_t surf_index =
 642       brw->wm.prog_data->binding_table.render_target_start + unit;
 643
 644    assert(!layered);
 645
 646    if (rb->TexImage && !brw->has_surface_tile_offset) {
 647       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 648
 649       if (tile_x != 0 || tile_y != 0) {
 650          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 651           * destination in a miptree unless you actually setup your renderbuffer
 652           * as a miptree and used the fragile lod/array_index/etc. controls to
 653           * select the image.  So, instead, we just make a new single-level
 654           * miptree and render into that.
 655           */
 656          intel_renderbuffer_move_to_temp(brw, irb, false);
 657          mt = irb->mt;
 658       }
 659    }
 660
 661    intel_miptree_used_for_rendering(irb->mt);
 662
 663    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 664                           &brw->wm.base.surf_offset[surf_index]);
 665
 666    format = brw->render_target_format[rb_format];
 667    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 668       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 669                     __FUNCTION__, _mesa_get_format_name(rb_format));
 670    }
 671
 672    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 673               format << BRW_SURFACE_FORMAT_SHIFT);
 674
 675    /* reloc */
 676    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 677               mt->bo->offset64);
 678
 679    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 680               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 681
 682    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 683               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 684
 685    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 686
 687    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 688    /* Note that the low bits of these fields are missing, so
 689     * there's the possibility of getting in trouble.
 690     */
 691    assert(tile_x % 4 == 0);
 692    assert(tile_y % 2 == 0);
 693    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 694               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 695               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 696
 697    if (brw->gen < 6) {
 698       /* _NEW_COLOR */
 699       if (!ctx->Color.ColorLogicOpEnabled &&
 700           (ctx->Color.BlendEnabled & (1 << unit)))
 701          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 702
 703       if (!ctx->Color.ColorMask[unit][0])
 704          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 705       if (!ctx->Color.ColorMask[unit][1])
 706          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 707       if (!ctx->Color.ColorMask[unit][2])
 708          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 709
 710       /* As mentioned above, disable writes to the alpha component when the
 711        * renderbuffer is XRGB.
 712        */
 713       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 714           !ctx->Color.ColorMask[unit][3]) {
 715          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 716       }
 717    }
 718
 719    drm_intel_bo_emit_reloc(brw->batch.bo,
 720                            brw->wm.base.surf_offset[surf_index] + 4,
 721                            mt->bo,
 722                            surf[1] - mt->bo->offset64,
 723                            I915_GEM_DOMAIN_RENDER,
 724                            I915_GEM_DOMAIN_RENDER);
 725 }
 726
 727 /**
 728  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 729  */
 730 static void
 731 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 732 {
 733    struct gl_context *ctx = &brw->ctx;
 734    GLuint i;
 735
 736    /* _NEW_BUFFERS | _NEW_COLOR */
 737    /* Update surfaces for drawing buffers */
 738    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 739       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 740          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 741             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 742                                                   ctx->DrawBuffer->MaxNumLayers > 0, i);
 743          } else {
 744             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 745          }
 746       }
 747    } else {
 748       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 749    }
 750    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 751 }
 752
 753 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 754    .dirty = {
 755       .mesa = (_NEW_COLOR |
 756                _NEW_BUFFERS),
 757       .brw = BRW_NEW_BATCH,
 758       .cache = 0
 759    },
 760    .emit = brw_update_renderbuffer_surfaces,
 761 };
 762
 763 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 764    .dirty = {
 765       .mesa = _NEW_BUFFERS,
 766       .brw = BRW_NEW_BATCH,
 767       .cache = 0
 768    },
 769    .emit = brw_update_renderbuffer_surfaces,
 770 };
 771
 772
 773 static void
 774 update_stage_texture_surfaces(struct brw_context *brw,
 775                               const struct gl_program *prog,
 776                               struct brw_stage_state *stage_state,
 777                               bool for_gather)
 778 {
 779    if (!prog)
 780       return;
 781
 782    struct gl_context *ctx = &brw->ctx;
 783
 784    uint32_t *surf_offset = stage_state->surf_offset;
 785    if (for_gather)
 786       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 787    else
 788       surf_offset += stage_state->prog_data->binding_table.texture_start;
 789
 790    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 791    for (unsigned s = 0; s < num_samplers; s++) {
 792       surf_offset[s] = 0;
 793
 794       if (prog->SamplersUsed & (1 << s)) {
 795          const unsigned unit = prog->SamplerUnits[s];
 796
 797          /* _NEW_TEXTURE */
 798          if (ctx->Texture.Unit[unit]._Current) {
 799             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 800          }
 801       }
 802    }
 803 }
 804
 805
 806 /**
 807  * Construct SURFACE_STATE objects for enabled textures.
 808  */
 809 static void
 810 brw_update_texture_surfaces(struct brw_context *brw)
 811 {
 812    /* BRW_NEW_VERTEX_PROGRAM */
 813    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 814
 815    /* BRW_NEW_GEOMETRY_PROGRAM */
 816    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 817
 818    /* BRW_NEW_FRAGMENT_PROGRAM */
 819    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 820
 821    /* _NEW_TEXTURE */
 822    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 823    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 824    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 825
 826    /* emit alternate set of surface state for gather. this
 827     * allows the surface format to be overriden for only the
 828     * gather4 messages. */
 829    if (brw->gen < 8) {
 830       if (vs && vs->UsesGather)
 831          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 832       if (gs && gs->UsesGather)
 833          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 834       if (fs && fs->UsesGather)
 835          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 836    }
 837
 838    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 839 }
 840
 841 const struct brw_tracked_state brw_texture_surfaces = {
 842    .dirty = {
 843       .mesa = _NEW_TEXTURE,
 844       .brw = BRW_NEW_BATCH |
 845              BRW_NEW_UNIFORM_BUFFER |
 846              BRW_NEW_VERTEX_PROGRAM |
 847              BRW_NEW_GEOMETRY_PROGRAM |
 848              BRW_NEW_FRAGMENT_PROGRAM,
 849       .cache = 0
 850    },
 851    .emit = brw_update_texture_surfaces,
 852 };
 853
 854 void
 855 brw_upload_ubo_surfaces(struct brw_context *brw,
 856                         struct gl_shader *shader,
 857                         struct brw_stage_state *stage_state,
 858                         struct brw_stage_prog_data *prog_data)
 859 {
 860    struct gl_context *ctx = &brw->ctx;
 861
 862    if (!shader)
 863       return;
 864
 865    uint32_t *surf_offsets =
 866       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 867
 868    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 869       struct gl_uniform_buffer_binding *binding;
 870       struct intel_buffer_object *intel_bo;
 871
 872       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 873       intel_bo = intel_buffer_object(binding->BufferObject);
 874       drm_intel_bo *bo =
 875          intel_bufferobj_buffer(brw, intel_bo,
 876                                 binding->Offset,
 877                                 binding->BufferObject->Size - binding->Offset);
 878
 879       /* Because behavior for referencing outside of the binding's size in the
 880        * glBindBufferRange case is undefined, we can just bind the whole buffer
 881        * glBindBufferBase wants and be a correct implementation.
 882        */
 883       brw_create_constant_surface(brw, bo, binding->Offset,
 884                                   bo->size - binding->Offset,
 885                                   &surf_offsets[i],
 886                                   shader->Stage == MESA_SHADER_FRAGMENT);
 887    }
 888
 889    if (shader->NumUniformBlocks)
 890       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 891 }
 892
 893 static void
 894 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 895 {
 896    struct gl_context *ctx = &brw->ctx;
 897    /* _NEW_PROGRAM */
 898    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 899
 900    if (!prog)
 901       return;
 902
 903    /* CACHE_NEW_WM_PROG */
 904    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 905                            &brw->wm.base, &brw->wm.prog_data->base);
 906 }
 907
 908 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 909    .dirty = {
 910       .mesa = _NEW_PROGRAM,
 911       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 912       .cache = CACHE_NEW_WM_PROG,
 913    },
 914    .emit = brw_upload_wm_ubo_surfaces,
 915 };
 916
 917 void
 918 brw_upload_abo_surfaces(struct brw_context *brw,
 919                         struct gl_shader_program *prog,
 920                         struct brw_stage_state *stage_state,
 921                         struct brw_stage_prog_data *prog_data)
 922 {
 923    struct gl_context *ctx = &brw->ctx;
 924    uint32_t *surf_offsets =
 925       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 926
 927    for (int i = 0; i < prog->NumAtomicBuffers; i++) {
 928       struct gl_atomic_buffer_binding *binding =
 929          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
 930       struct intel_buffer_object *intel_bo =
 931          intel_buffer_object(binding->BufferObject);
 932       drm_intel_bo *bo = intel_bufferobj_buffer(
 933          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
 934
 935       brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
 936                                    bo->size - binding->Offset,
 937                                    &surf_offsets[i], true);
 938    }
 939
 940    if (prog->NumUniformBlocks)
 941       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 942 }
 943
 944 static void
 945 brw_upload_wm_abo_surfaces(struct brw_context *brw)
 946 {
 947    struct gl_context *ctx = &brw->ctx;
 948    /* _NEW_PROGRAM */
 949    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 950
 951    if (prog) {
 952       /* CACHE_NEW_WM_PROG */
 953       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
 954                               &brw->wm.prog_data->base);
 955    }
 956 }
 957
 958 const struct brw_tracked_state brw_wm_abo_surfaces = {
 959    .dirty = {
 960       .mesa = _NEW_PROGRAM,
 961       .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
 962       .cache = CACHE_NEW_WM_PROG,
 963    },
 964    .emit = brw_upload_wm_abo_surfaces,
 965 };
 966
 967 void
 968 gen4_init_vtable_surface_functions(struct brw_context *brw)
 969 {
 970    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 971    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 972    brw->vtbl.update_null_renderbuffer_surface =
 973       brw_update_null_renderbuffer_surface;
 974    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 975 }