src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "main/teximage.h"
  39 #include "program/prog_parameter.h"
  40 #include "program/prog_instruction.h"
  41 #include "main/framebuffer.h"
  42
  43 #include "isl/isl.h"
  44
  45 #include "intel_mipmap_tree.h"
  46 #include "intel_batchbuffer.h"
  47 #include "intel_tex.h"
  48 #include "intel_fbo.h"
  49 #include "intel_buffer_objects.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_state.h"
  53 #include "brw_defines.h"
  54 #include "brw_wm.h"
  55
  56 struct surface_state_info {
  57    unsigned num_dwords;
  58    unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
  59    unsigned reloc_dw;
  60    unsigned aux_reloc_dw;
  61    unsigned tex_mocs;
  62    unsigned rb_mocs;
  63 };
  64
  65 static const struct surface_state_info surface_state_infos[] = {
  66    [4] = {6,  32, 1,  0},
  67    [5] = {6,  32, 1,  0},
  68    [6] = {6,  32, 1,  0},
  69    [7] = {8,  32, 1,  6,  GEN7_MOCS_L3, GEN7_MOCS_L3},
  70    [8] = {13, 64, 8,  10, BDW_MOCS_WB,  BDW_MOCS_PTE},
  71    [9] = {16, 64, 8,  10, SKL_MOCS_WB,  SKL_MOCS_PTE},
  72 };
  73
  74 void
  75 brw_emit_surface_state(struct brw_context *brw,
  76                        struct intel_mipmap_tree *mt,
  77                        const struct isl_view *view,
  78                        uint32_t mocs, bool for_gather,
  79                        uint32_t *surf_offset, int surf_index,
  80                        unsigned read_domains, unsigned write_domains)
  81 {
  82    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
  83
  84    struct isl_surf surf;
  85    intel_miptree_get_isl_surf(brw, mt, &surf);
  86
  87    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
  88
  89    struct isl_surf *aux_surf = NULL, aux_surf_s;
  90    uint64_t aux_offset = 0;
  91    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
  92    if (mt->mcs_mt &&
  93        ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
  94         mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
  95       intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
  96       aux_surf = &aux_surf_s;
  97       assert(mt->mcs_mt->offset == 0);
  98       aux_offset = mt->mcs_mt->bo->offset64;
  99
 100       /* We only really need a clear color if we also have an auxiliary
 101        * surfacae.  Without one, it does nothing.
 102        */
 103       clear_color = intel_miptree_get_isl_clear_color(brw, mt);
 104    }
 105
 106    uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 107                                     ss_info.num_dwords * 4, ss_info.ss_align,
 108                                     surf_index, surf_offset);
 109
 110    isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
 111                        .address = mt->bo->offset64 + mt->offset,
 112                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 113                        .aux_address = aux_offset,
 114                        .mocs = mocs, .clear_color = clear_color);
 115
 116    drm_intel_bo_emit_reloc(brw->batch.bo,
 117                            *surf_offset + 4 * ss_info.reloc_dw,
 118                            mt->bo, mt->offset,
 119                            read_domains, write_domains);
 120
 121    if (aux_surf) {
 122       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 123        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 124        * contain other control information.  Since buffer addresses are always
 125        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 126        * an ordinary reloc to do the necessary address translation.
 127        */
 128       assert((aux_offset & 0xfff) == 0);
 129       drm_intel_bo_emit_reloc(brw->batch.bo,
 130                               *surf_offset + 4 * ss_info.aux_reloc_dw,
 131                               mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
 132                               read_domains, write_domains);
 133    }
 134 }
 135
 136 GLuint
 137 translate_tex_target(GLenum target)
 138 {
 139    switch (target) {
 140    case GL_TEXTURE_1D:
 141    case GL_TEXTURE_1D_ARRAY_EXT:
 142       return BRW_SURFACE_1D;
 143
 144    case GL_TEXTURE_RECTANGLE_NV:
 145       return BRW_SURFACE_2D;
 146
 147    case GL_TEXTURE_2D:
 148    case GL_TEXTURE_2D_ARRAY_EXT:
 149    case GL_TEXTURE_EXTERNAL_OES:
 150    case GL_TEXTURE_2D_MULTISAMPLE:
 151    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 152       return BRW_SURFACE_2D;
 153
 154    case GL_TEXTURE_3D:
 155       return BRW_SURFACE_3D;
 156
 157    case GL_TEXTURE_CUBE_MAP:
 158    case GL_TEXTURE_CUBE_MAP_ARRAY:
 159       return BRW_SURFACE_CUBE;
 160
 161    default:
 162       unreachable("not reached");
 163    }
 164 }
 165
 166 uint32_t
 167 brw_get_surface_tiling_bits(uint32_t tiling)
 168 {
 169    switch (tiling) {
 170    case I915_TILING_X:
 171       return BRW_SURFACE_TILED;
 172    case I915_TILING_Y:
 173       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 174    default:
 175       return 0;
 176    }
 177 }
 178
 179
 180 uint32_t
 181 brw_get_surface_num_multisamples(unsigned num_samples)
 182 {
 183    if (num_samples > 1)
 184       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 185    else
 186       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 187 }
 188
 189 /**
 190  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 191  * swizzling.
 192  */
 193 int
 194 brw_get_texture_swizzle(const struct gl_context *ctx,
 195                         const struct gl_texture_object *t)
 196 {
 197    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 198
 199    int swizzles[SWIZZLE_NIL + 1] = {
 200       SWIZZLE_X,
 201       SWIZZLE_Y,
 202       SWIZZLE_Z,
 203       SWIZZLE_W,
 204       SWIZZLE_ZERO,
 205       SWIZZLE_ONE,
 206       SWIZZLE_NIL
 207    };
 208
 209    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 210        img->_BaseFormat == GL_DEPTH_STENCIL) {
 211       GLenum depth_mode = t->DepthMode;
 212
 213       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 214        * with depth component data specified with a sized internal format.
 215        * Otherwise, it's left at the old default, GL_LUMINANCE.
 216        */
 217       if (_mesa_is_gles3(ctx) &&
 218           img->InternalFormat != GL_DEPTH_COMPONENT &&
 219           img->InternalFormat != GL_DEPTH_STENCIL) {
 220          depth_mode = GL_RED;
 221       }
 222
 223       switch (depth_mode) {
 224       case GL_ALPHA:
 225          swizzles[0] = SWIZZLE_ZERO;
 226          swizzles[1] = SWIZZLE_ZERO;
 227          swizzles[2] = SWIZZLE_ZERO;
 228          swizzles[3] = SWIZZLE_X;
 229          break;
 230       case GL_LUMINANCE:
 231          swizzles[0] = SWIZZLE_X;
 232          swizzles[1] = SWIZZLE_X;
 233          swizzles[2] = SWIZZLE_X;
 234          swizzles[3] = SWIZZLE_ONE;
 235          break;
 236       case GL_INTENSITY:
 237          swizzles[0] = SWIZZLE_X;
 238          swizzles[1] = SWIZZLE_X;
 239          swizzles[2] = SWIZZLE_X;
 240          swizzles[3] = SWIZZLE_X;
 241          break;
 242       case GL_RED:
 243          swizzles[0] = SWIZZLE_X;
 244          swizzles[1] = SWIZZLE_ZERO;
 245          swizzles[2] = SWIZZLE_ZERO;
 246          swizzles[3] = SWIZZLE_ONE;
 247          break;
 248       }
 249    }
 250
 251    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 252
 253    /* If the texture's format is alpha-only, force R, G, and B to
 254     * 0.0. Similarly, if the texture's format has no alpha channel,
 255     * force the alpha value read to 1.0. This allows for the
 256     * implementation to use an RGBA texture for any of these formats
 257     * without leaking any unexpected values.
 258     */
 259    switch (img->_BaseFormat) {
 260    case GL_ALPHA:
 261       swizzles[0] = SWIZZLE_ZERO;
 262       swizzles[1] = SWIZZLE_ZERO;
 263       swizzles[2] = SWIZZLE_ZERO;
 264       break;
 265    case GL_LUMINANCE:
 266       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 267          swizzles[0] = SWIZZLE_X;
 268          swizzles[1] = SWIZZLE_X;
 269          swizzles[2] = SWIZZLE_X;
 270          swizzles[3] = SWIZZLE_ONE;
 271       }
 272       break;
 273    case GL_LUMINANCE_ALPHA:
 274       if (datatype == GL_SIGNED_NORMALIZED) {
 275          swizzles[0] = SWIZZLE_X;
 276          swizzles[1] = SWIZZLE_X;
 277          swizzles[2] = SWIZZLE_X;
 278          swizzles[3] = SWIZZLE_W;
 279       }
 280       break;
 281    case GL_INTENSITY:
 282       if (datatype == GL_SIGNED_NORMALIZED) {
 283          swizzles[0] = SWIZZLE_X;
 284          swizzles[1] = SWIZZLE_X;
 285          swizzles[2] = SWIZZLE_X;
 286          swizzles[3] = SWIZZLE_X;
 287       }
 288       break;
 289    case GL_RED:
 290    case GL_RG:
 291    case GL_RGB:
 292       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 293          swizzles[3] = SWIZZLE_ONE;
 294       break;
 295    }
 296
 297    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 298                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 299                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 300                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 301 }
 302
 303 static void
 304 gen4_emit_buffer_surface_state(struct brw_context *brw,
 305                                uint32_t *out_offset,
 306                                drm_intel_bo *bo,
 307                                unsigned buffer_offset,
 308                                unsigned surface_format,
 309                                unsigned buffer_size,
 310                                unsigned pitch,
 311                                bool rw)
 312 {
 313    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 314                                     6 * 4, 32, out_offset);
 315    memset(surf, 0, 6 * 4);
 316
 317    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 318              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 319              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 320    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 321    surf[2] = ((buffer_size - 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 322              (((buffer_size - 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 323    surf[3] = (((buffer_size - 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 324              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 325
 326    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 327     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 328     * physical cache.  It is mapped in hardware to the sampler cache."
 329     */
 330    if (bo) {
 331       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 332                               bo, buffer_offset,
 333                               I915_GEM_DOMAIN_SAMPLER,
 334                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 335    }
 336 }
 337
 338 void
 339 brw_update_buffer_texture_surface(struct gl_context *ctx,
 340                                   unsigned unit,
 341                                   uint32_t *surf_offset)
 342 {
 343    struct brw_context *brw = brw_context(ctx);
 344    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 345    struct intel_buffer_object *intel_obj =
 346       intel_buffer_object(tObj->BufferObject);
 347    uint32_t size = tObj->BufferSize;
 348    drm_intel_bo *bo = NULL;
 349    mesa_format format = tObj->_BufferObjectFormat;
 350    uint32_t brw_format = brw_format_for_mesa_format(format);
 351    int texel_size = _mesa_get_format_bytes(format);
 352
 353    if (intel_obj) {
 354       size = MIN2(size, intel_obj->Base.Size);
 355       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 356    }
 357
 358    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 359       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 360                     _mesa_get_format_name(format));
 361    }
 362
 363    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 364                                        tObj->BufferOffset,
 365                                        brw_format,
 366                                        size / texel_size,
 367                                        texel_size,
 368                                        false /* rw */);
 369 }
 370
 371 static void
 372 brw_update_texture_surface(struct gl_context *ctx,
 373                            unsigned unit,
 374                            uint32_t *surf_offset,
 375                            bool for_gather,
 376                            uint32_t plane)
 377 {
 378    struct brw_context *brw = brw_context(ctx);
 379    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 380    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 381    struct intel_mipmap_tree *mt = intelObj->mt;
 382    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 383    uint32_t *surf;
 384
 385    /* BRW_NEW_TEXTURE_BUFFER */
 386    if (tObj->Target == GL_TEXTURE_BUFFER) {
 387       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 388       return;
 389    }
 390
 391    if (plane > 0) {
 392       if (mt->plane[plane - 1] == NULL)
 393          return;
 394       mt = mt->plane[plane - 1];
 395    }
 396
 397    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 398                           6 * 4, 32, surf_offset);
 399
 400    mesa_format mesa_fmt = plane == 0 ? intelObj->_Format : mt->format;
 401    uint32_t tex_format = translate_tex_format(brw, mesa_fmt,
 402                                               sampler->sRGBDecode);
 403
 404    if (for_gather) {
 405       /* Sandybridge's gather4 message is broken for integer formats.
 406        * To work around this, we pretend the surface is UNORM for
 407        * 8 or 16-bit formats, and emit shader instructions to recover
 408        * the real INT/UINT value.  For 32-bit formats, we pretend
 409        * the surface is FLOAT, and simply reinterpret the resulting
 410        * bits.
 411        */
 412       switch (tex_format) {
 413       case BRW_SURFACEFORMAT_R8_SINT:
 414       case BRW_SURFACEFORMAT_R8_UINT:
 415          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 416          break;
 417
 418       case BRW_SURFACEFORMAT_R16_SINT:
 419       case BRW_SURFACEFORMAT_R16_UINT:
 420          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 421          break;
 422
 423       case BRW_SURFACEFORMAT_R32_SINT:
 424       case BRW_SURFACEFORMAT_R32_UINT:
 425          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 426          break;
 427
 428       default:
 429          break;
 430       }
 431    }
 432
 433    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 434               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 435               BRW_SURFACE_CUBEFACE_ENABLES |
 436               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 437
 438    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 439
 440    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 441               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 442               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 443
 444    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 445               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 446               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 447
 448    const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
 449    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 450               SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
 451               SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
 452
 453    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 454
 455    /* Emit relocation to surface contents */
 456    drm_intel_bo_emit_reloc(brw->batch.bo,
 457                            *surf_offset + 4,
 458                            mt->bo,
 459                            surf[1] - mt->bo->offset64,
 460                            I915_GEM_DOMAIN_SAMPLER, 0);
 461 }
 462
 463 /**
 464  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 465  * read from this buffer with Data Port Read instructions/messages.
 466  */
 467 void
 468 brw_create_constant_surface(struct brw_context *brw,
 469                             drm_intel_bo *bo,
 470                             uint32_t offset,
 471                             uint32_t size,
 472                             uint32_t *out_offset)
 473 {
 474    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 475                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 476                                        size, 1, false);
 477 }
 478
 479 /**
 480  * Create the buffer surface. Shader buffer variables will be
 481  * read from / write to this buffer with Data Port Read/Write
 482  * instructions/messages.
 483  */
 484 void
 485 brw_create_buffer_surface(struct brw_context *brw,
 486                           drm_intel_bo *bo,
 487                           uint32_t offset,
 488                           uint32_t size,
 489                           uint32_t *out_offset)
 490 {
 491    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 492     * messages. We need these specifically for the fragment shader since they
 493     * include a pixel mask header that we need to ensure correct behavior
 494     * with helper invocations, which cannot write to the buffer.
 495     */
 496    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 497                                        BRW_SURFACEFORMAT_RAW,
 498                                        size, 1, true);
 499 }
 500
 501 /**
 502  * Set up a binding table entry for use by stream output logic (transform
 503  * feedback).
 504  *
 505  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 506  */
 507 void
 508 brw_update_sol_surface(struct brw_context *brw,
 509                        struct gl_buffer_object *buffer_obj,
 510                        uint32_t *out_offset, unsigned num_vector_components,
 511                        unsigned stride_dwords, unsigned offset_dwords)
 512 {
 513    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 514    uint32_t offset_bytes = 4 * offset_dwords;
 515    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 516                                              offset_bytes,
 517                                              buffer_obj->Size - offset_bytes);
 518    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 519                                     out_offset);
 520    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 521    size_t size_dwords = buffer_obj->Size / 4;
 522    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 523
 524    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 525     * too big to map using a single binding table entry?
 526     */
 527    assert((size_dwords - offset_dwords) / stride_dwords
 528           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 529
 530    if (size_dwords > offset_dwords + num_vector_components) {
 531       /* There is room for at least 1 transform feedback output in the buffer.
 532        * Compute the number of additional transform feedback outputs the
 533        * buffer has room for.
 534        */
 535       buffer_size_minus_1 =
 536          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 537    } else {
 538       /* There isn't even room for a single transform feedback output in the
 539        * buffer.  We can't configure the binding table entry to prevent output
 540        * entirely; we'll have to rely on the geometry shader to detect
 541        * overflow.  But to minimize the damage in case of a bug, set up the
 542        * binding table entry to just allow a single output.
 543        */
 544       buffer_size_minus_1 = 0;
 545    }
 546    width = buffer_size_minus_1 & 0x7f;
 547    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 548    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 549
 550    switch (num_vector_components) {
 551    case 1:
 552       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 553       break;
 554    case 2:
 555       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 556       break;
 557    case 3:
 558       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 559       break;
 560    case 4:
 561       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 562       break;
 563    default:
 564       unreachable("Invalid vector size for transform feedback output");
 565    }
 566
 567    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 568       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 569       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 570       BRW_SURFACE_RC_READ_WRITE;
 571    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 572    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 573               height << BRW_SURFACE_HEIGHT_SHIFT);
 574    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 575               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 576    surf[4] = 0;
 577    surf[5] = 0;
 578
 579    /* Emit relocation to surface contents. */
 580    drm_intel_bo_emit_reloc(brw->batch.bo,
 581                            *out_offset + 4,
 582                            bo, offset_bytes,
 583                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 584 }
 585
 586 /* Creates a new WM constant buffer reflecting the current fragment program's
 587  * constants, if needed by the fragment program.
 588  *
 589  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 590  * state atom.
 591  */
 592 static void
 593 brw_upload_wm_pull_constants(struct brw_context *brw)
 594 {
 595    struct brw_stage_state *stage_state = &brw->wm.base;
 596    /* BRW_NEW_FRAGMENT_PROGRAM */
 597    struct brw_fragment_program *fp =
 598       (struct brw_fragment_program *) brw->fragment_program;
 599    /* BRW_NEW_FS_PROG_DATA */
 600    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 601
 602    /* _NEW_PROGRAM_CONSTANTS */
 603    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 604                              stage_state, prog_data);
 605 }
 606
 607 const struct brw_tracked_state brw_wm_pull_constants = {
 608    .dirty = {
 609       .mesa = _NEW_PROGRAM_CONSTANTS,
 610       .brw = BRW_NEW_BATCH |
 611              BRW_NEW_BLORP |
 612              BRW_NEW_FRAGMENT_PROGRAM |
 613              BRW_NEW_FS_PROG_DATA,
 614    },
 615    .emit = brw_upload_wm_pull_constants,
 616 };
 617
 618 /**
 619  * Creates a null renderbuffer surface.
 620  *
 621  * This is used when the shader doesn't write to any color output.  An FB
 622  * write to target 0 will still be emitted, because that's how the thread is
 623  * terminated (and computed depth is returned), so we need to have the
 624  * hardware discard the target 0 color output..
 625  */
 626 static void
 627 brw_emit_null_surface_state(struct brw_context *brw,
 628                             unsigned width,
 629                             unsigned height,
 630                             unsigned samples,
 631                             uint32_t *out_offset)
 632 {
 633    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 634     * Notes):
 635     *
 636     *     A null surface will be used in instances where an actual surface is
 637     *     not bound. When a write message is generated to a null surface, no
 638     *     actual surface is written to. When a read message (including any
 639     *     sampling engine message) is generated to a null surface, the result
 640     *     is all zeros. Note that a null surface type is allowed to be used
 641     *     with all messages, even if it is not specificially indicated as
 642     *     supported. All of the remaining fields in surface state are ignored
 643     *     for null surfaces, with the following exceptions:
 644     *
 645     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 646     *       depth buffer’s corresponding state for all render target surfaces,
 647     *       including null.
 648     *
 649     *     - Surface Format must be R8G8B8A8_UNORM.
 650     */
 651    unsigned surface_type = BRW_SURFACE_NULL;
 652    drm_intel_bo *bo = NULL;
 653    unsigned pitch_minus_1 = 0;
 654    uint32_t multisampling_state = 0;
 655    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 656                                     out_offset);
 657
 658    if (samples > 1) {
 659       /* On Gen6, null render targets seem to cause GPU hangs when
 660        * multisampling.  So work around this problem by rendering into dummy
 661        * color buffer.
 662        *
 663        * To decrease the amount of memory needed by the workaround buffer, we
 664        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 665        * the amount of memory needed for the workaround buffer is
 666        * (width_in_tiles + height_in_tiles - 1) tiles.
 667        *
 668        * Note that since the workaround buffer will be interpreted by the
 669        * hardware as an interleaved multisampled buffer, we need to compute
 670        * width_in_tiles and height_in_tiles by dividing the width and height
 671        * by 16 rather than the normal Y-tile size of 32.
 672        */
 673       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 674       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 675       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 676       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 677                          size_needed);
 678       bo = brw->wm.multisampled_null_render_target_bo;
 679       surface_type = BRW_SURFACE_2D;
 680       pitch_minus_1 = 127;
 681       multisampling_state = brw_get_surface_num_multisamples(samples);
 682    }
 683
 684    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 685               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 686    if (brw->gen < 6) {
 687       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 688                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 689                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 690                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 691    }
 692    surf[1] = bo ? bo->offset64 : 0;
 693    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 694               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 695
 696    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 697     * Notes):
 698     *
 699     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 700     */
 701    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 702               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 703    surf[4] = multisampling_state;
 704    surf[5] = 0;
 705
 706    if (bo) {
 707       drm_intel_bo_emit_reloc(brw->batch.bo,
 708                               *out_offset + 4,
 709                               bo, 0,
 710                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 711    }
 712 }
 713
 714 /**
 715  * Sets up a surface state structure to point at the given region.
 716  * While it is only used for the front/back buffer currently, it should be
 717  * usable for further buffers when doing ARB_draw_buffer support.
 718  */
 719 static uint32_t
 720 brw_update_renderbuffer_surface(struct brw_context *brw,
 721                                 struct gl_renderbuffer *rb,
 722                                 bool layered, unsigned unit,
 723                                 uint32_t surf_index)
 724 {
 725    struct gl_context *ctx = &brw->ctx;
 726    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 727    struct intel_mipmap_tree *mt = irb->mt;
 728    uint32_t *surf;
 729    uint32_t tile_x, tile_y;
 730    uint32_t format = 0;
 731    uint32_t offset;
 732    /* _NEW_BUFFERS */
 733    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 734    /* BRW_NEW_FS_PROG_DATA */
 735
 736    assert(!layered);
 737
 738    if (rb->TexImage && !brw->has_surface_tile_offset) {
 739       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 740
 741       if (tile_x != 0 || tile_y != 0) {
 742          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 743           * destination in a miptree unless you actually setup your renderbuffer
 744           * as a miptree and used the fragile lod/array_index/etc. controls to
 745           * select the image.  So, instead, we just make a new single-level
 746           * miptree and render into that.
 747           */
 748          intel_renderbuffer_move_to_temp(brw, irb, false);
 749          mt = irb->mt;
 750       }
 751    }
 752
 753    intel_miptree_used_for_rendering(irb->mt);
 754
 755    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 756
 757    format = brw->render_target_format[rb_format];
 758    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 759       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 760                     __func__, _mesa_get_format_name(rb_format));
 761    }
 762
 763    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 764               format << BRW_SURFACE_FORMAT_SHIFT);
 765
 766    /* reloc */
 767    assert(mt->offset % mt->cpp == 0);
 768    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 769               mt->bo->offset64 + mt->offset);
 770
 771    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 772               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 773
 774    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 775               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 776
 777    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 778
 779    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 780    /* Note that the low bits of these fields are missing, so
 781     * there's the possibility of getting in trouble.
 782     */
 783    assert(tile_x % 4 == 0);
 784    assert(tile_y % 2 == 0);
 785    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 786               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 787               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 788
 789    if (brw->gen < 6) {
 790       /* _NEW_COLOR */
 791       if (!ctx->Color.ColorLogicOpEnabled &&
 792           (ctx->Color.BlendEnabled & (1 << unit)))
 793          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 794
 795       if (!ctx->Color.ColorMask[unit][0])
 796          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 797       if (!ctx->Color.ColorMask[unit][1])
 798          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 799       if (!ctx->Color.ColorMask[unit][2])
 800          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 801
 802       /* As mentioned above, disable writes to the alpha component when the
 803        * renderbuffer is XRGB.
 804        */
 805       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 806           !ctx->Color.ColorMask[unit][3]) {
 807          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 808       }
 809    }
 810
 811    drm_intel_bo_emit_reloc(brw->batch.bo,
 812                            offset + 4,
 813                            mt->bo,
 814                            surf[1] - mt->bo->offset64,
 815                            I915_GEM_DOMAIN_RENDER,
 816                            I915_GEM_DOMAIN_RENDER);
 817
 818    return offset;
 819 }
 820
 821 /**
 822  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 823  */
 824 void
 825 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 826                                  const struct gl_framebuffer *fb,
 827                                  uint32_t render_target_start,
 828                                  uint32_t *surf_offset)
 829 {
 830    GLuint i;
 831    const unsigned int w = _mesa_geometric_width(fb);
 832    const unsigned int h = _mesa_geometric_height(fb);
 833    const unsigned int s = _mesa_geometric_samples(fb);
 834
 835    /* Update surfaces for drawing buffers */
 836    if (fb->_NumColorDrawBuffers >= 1) {
 837       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 838          const uint32_t surf_index = render_target_start + i;
 839
 840          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 841             surf_offset[surf_index] =
 842                brw->vtbl.update_renderbuffer_surface(
 843                   brw, fb->_ColorDrawBuffers[i],
 844                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 845          } else {
 846             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 847                &surf_offset[surf_index]);
 848          }
 849       }
 850    } else {
 851       const uint32_t surf_index = render_target_start;
 852       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 853          &surf_offset[surf_index]);
 854    }
 855 }
 856
 857 static void
 858 update_renderbuffer_surfaces(struct brw_context *brw)
 859 {
 860    const struct gl_context *ctx = &brw->ctx;
 861
 862    /* _NEW_BUFFERS | _NEW_COLOR */
 863    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 864    brw_update_renderbuffer_surfaces(
 865       brw, fb,
 866       brw->wm.prog_data->binding_table.render_target_start,
 867       brw->wm.base.surf_offset);
 868    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 869 }
 870
 871 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 872    .dirty = {
 873       .mesa = _NEW_BUFFERS |
 874               _NEW_COLOR,
 875       .brw = BRW_NEW_BATCH |
 876              BRW_NEW_BLORP |
 877              BRW_NEW_FS_PROG_DATA,
 878    },
 879    .emit = update_renderbuffer_surfaces,
 880 };
 881
 882 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 883    .dirty = {
 884       .mesa = _NEW_BUFFERS,
 885       .brw = BRW_NEW_BATCH |
 886              BRW_NEW_BLORP,
 887    },
 888    .emit = update_renderbuffer_surfaces,
 889 };
 890
 891
 892 static void
 893 update_stage_texture_surfaces(struct brw_context *brw,
 894                               const struct gl_program *prog,
 895                               struct brw_stage_state *stage_state,
 896                               bool for_gather, uint32_t plane)
 897 {
 898    if (!prog)
 899       return;
 900
 901    struct gl_context *ctx = &brw->ctx;
 902
 903    uint32_t *surf_offset = stage_state->surf_offset;
 904
 905    /* BRW_NEW_*_PROG_DATA */
 906    if (for_gather)
 907       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 908    else
 909       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
 910
 911    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 912    for (unsigned s = 0; s < num_samplers; s++) {
 913       surf_offset[s] = 0;
 914
 915       if (prog->SamplersUsed & (1 << s)) {
 916          const unsigned unit = prog->SamplerUnits[s];
 917
 918          /* _NEW_TEXTURE */
 919          if (ctx->Texture.Unit[unit]._Current) {
 920             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
 921          }
 922       }
 923    }
 924 }
 925
 926
 927 /**
 928  * Construct SURFACE_STATE objects for enabled textures.
 929  */
 930 static void
 931 brw_update_texture_surfaces(struct brw_context *brw)
 932 {
 933    /* BRW_NEW_VERTEX_PROGRAM */
 934    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 935
 936    /* BRW_NEW_TESS_PROGRAMS */
 937    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 938    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 939
 940    /* BRW_NEW_GEOMETRY_PROGRAM */
 941    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 942
 943    /* BRW_NEW_FRAGMENT_PROGRAM */
 944    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 945
 946    /* _NEW_TEXTURE */
 947    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
 948    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
 949    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
 950    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
 951    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
 952
 953    /* emit alternate set of surface state for gather. this
 954     * allows the surface format to be overriden for only the
 955     * gather4 messages. */
 956    if (brw->gen < 8) {
 957       if (vs && vs->UsesGather)
 958          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
 959       if (tcs && tcs->UsesGather)
 960          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
 961       if (tes && tes->UsesGather)
 962          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
 963       if (gs && gs->UsesGather)
 964          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
 965       if (fs && fs->UsesGather)
 966          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
 967    }
 968
 969    if (fs) {
 970       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
 971       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
 972    }
 973
 974    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 975 }
 976
 977 const struct brw_tracked_state brw_texture_surfaces = {
 978    .dirty = {
 979       .mesa = _NEW_TEXTURE,
 980       .brw = BRW_NEW_BATCH |
 981              BRW_NEW_BLORP |
 982              BRW_NEW_FRAGMENT_PROGRAM |
 983              BRW_NEW_FS_PROG_DATA |
 984              BRW_NEW_GEOMETRY_PROGRAM |
 985              BRW_NEW_GS_PROG_DATA |
 986              BRW_NEW_TESS_PROGRAMS |
 987              BRW_NEW_TCS_PROG_DATA |
 988              BRW_NEW_TES_PROG_DATA |
 989              BRW_NEW_TEXTURE_BUFFER |
 990              BRW_NEW_VERTEX_PROGRAM |
 991              BRW_NEW_VS_PROG_DATA,
 992    },
 993    .emit = brw_update_texture_surfaces,
 994 };
 995
 996 static void
 997 brw_update_cs_texture_surfaces(struct brw_context *brw)
 998 {
 999    /* BRW_NEW_COMPUTE_PROGRAM */
1000    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1001
1002    /* _NEW_TEXTURE */
1003    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1004
1005    /* emit alternate set of surface state for gather. this
1006     * allows the surface format to be overriden for only the
1007     * gather4 messages.
1008     */
1009    if (brw->gen < 8) {
1010       if (cs && cs->UsesGather)
1011          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1012    }
1013
1014    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1015 }
1016
1017 const struct brw_tracked_state brw_cs_texture_surfaces = {
1018    .dirty = {
1019       .mesa = _NEW_TEXTURE,
1020       .brw = BRW_NEW_BATCH |
1021              BRW_NEW_BLORP |
1022              BRW_NEW_COMPUTE_PROGRAM,
1023    },
1024    .emit = brw_update_cs_texture_surfaces,
1025 };
1026
1027
1028 void
1029 brw_upload_ubo_surfaces(struct brw_context *brw,
1030                         struct gl_linked_shader *shader,
1031                         struct brw_stage_state *stage_state,
1032                         struct brw_stage_prog_data *prog_data)
1033 {
1034    struct gl_context *ctx = &brw->ctx;
1035
1036    if (!shader)
1037       return;
1038
1039    uint32_t *ubo_surf_offsets =
1040       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1041
1042    for (int i = 0; i < shader->NumUniformBlocks; i++) {
1043       struct gl_uniform_buffer_binding *binding =
1044          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1045
1046       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1047          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1048       } else {
1049          struct intel_buffer_object *intel_bo =
1050             intel_buffer_object(binding->BufferObject);
1051          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1052          if (!binding->AutomaticSize)
1053             size = MIN2(size, binding->Size);
1054          drm_intel_bo *bo =
1055             intel_bufferobj_buffer(brw, intel_bo,
1056                                    binding->Offset,
1057                                    size);
1058          brw_create_constant_surface(brw, bo, binding->Offset,
1059                                      size,
1060                                      &ubo_surf_offsets[i]);
1061       }
1062    }
1063
1064    uint32_t *ssbo_surf_offsets =
1065       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1066
1067    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1068       struct gl_shader_storage_buffer_binding *binding =
1069          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1070
1071       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1072          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1073       } else {
1074          struct intel_buffer_object *intel_bo =
1075             intel_buffer_object(binding->BufferObject);
1076          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1077          if (!binding->AutomaticSize)
1078             size = MIN2(size, binding->Size);
1079          drm_intel_bo *bo =
1080             intel_bufferobj_buffer(brw, intel_bo,
1081                                    binding->Offset,
1082                                    size);
1083          brw_create_buffer_surface(brw, bo, binding->Offset,
1084                                    size,
1085                                    &ssbo_surf_offsets[i]);
1086       }
1087    }
1088
1089    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1090       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1091 }
1092
1093 static void
1094 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1095 {
1096    struct gl_context *ctx = &brw->ctx;
1097    /* _NEW_PROGRAM */
1098    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1099
1100    if (!prog)
1101       return;
1102
1103    /* BRW_NEW_FS_PROG_DATA */
1104    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1105                            &brw->wm.base, &brw->wm.prog_data->base);
1106 }
1107
1108 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1109    .dirty = {
1110       .mesa = _NEW_PROGRAM,
1111       .brw = BRW_NEW_BATCH |
1112              BRW_NEW_BLORP |
1113              BRW_NEW_FS_PROG_DATA |
1114              BRW_NEW_UNIFORM_BUFFER,
1115    },
1116    .emit = brw_upload_wm_ubo_surfaces,
1117 };
1118
1119 static void
1120 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1121 {
1122    struct gl_context *ctx = &brw->ctx;
1123    /* _NEW_PROGRAM */
1124    struct gl_shader_program *prog =
1125       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1126
1127    if (!prog)
1128       return;
1129
1130    /* BRW_NEW_CS_PROG_DATA */
1131    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1132                            &brw->cs.base, &brw->cs.prog_data->base);
1133 }
1134
1135 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1136    .dirty = {
1137       .mesa = _NEW_PROGRAM,
1138       .brw = BRW_NEW_BATCH |
1139              BRW_NEW_BLORP |
1140              BRW_NEW_CS_PROG_DATA |
1141              BRW_NEW_UNIFORM_BUFFER,
1142    },
1143    .emit = brw_upload_cs_ubo_surfaces,
1144 };
1145
1146 void
1147 brw_upload_abo_surfaces(struct brw_context *brw,
1148                         struct gl_linked_shader *shader,
1149                         struct brw_stage_state *stage_state,
1150                         struct brw_stage_prog_data *prog_data)
1151 {
1152    struct gl_context *ctx = &brw->ctx;
1153    uint32_t *surf_offsets =
1154       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1155
1156    if (shader && shader->NumAtomicBuffers) {
1157       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1158          struct gl_atomic_buffer_binding *binding =
1159             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1160          struct intel_buffer_object *intel_bo =
1161             intel_buffer_object(binding->BufferObject);
1162          drm_intel_bo *bo = intel_bufferobj_buffer(
1163             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1164
1165          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1166                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1167                                              bo->size - binding->Offset, 1, true);
1168       }
1169
1170       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1171    }
1172 }
1173
1174 static void
1175 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1176 {
1177    struct gl_context *ctx = &brw->ctx;
1178    /* _NEW_PROGRAM */
1179    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1180
1181    if (prog) {
1182       /* BRW_NEW_FS_PROG_DATA */
1183       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1184                               &brw->wm.base, &brw->wm.prog_data->base);
1185    }
1186 }
1187
1188 const struct brw_tracked_state brw_wm_abo_surfaces = {
1189    .dirty = {
1190       .mesa = _NEW_PROGRAM,
1191       .brw = BRW_NEW_ATOMIC_BUFFER |
1192              BRW_NEW_BLORP |
1193              BRW_NEW_BATCH |
1194              BRW_NEW_FS_PROG_DATA,
1195    },
1196    .emit = brw_upload_wm_abo_surfaces,
1197 };
1198
1199 static void
1200 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1201 {
1202    struct gl_context *ctx = &brw->ctx;
1203    /* _NEW_PROGRAM */
1204    struct gl_shader_program *prog =
1205       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1206
1207    if (prog) {
1208       /* BRW_NEW_CS_PROG_DATA */
1209       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1210                               &brw->cs.base, &brw->cs.prog_data->base);
1211    }
1212 }
1213
1214 const struct brw_tracked_state brw_cs_abo_surfaces = {
1215    .dirty = {
1216       .mesa = _NEW_PROGRAM,
1217       .brw = BRW_NEW_ATOMIC_BUFFER |
1218              BRW_NEW_BLORP |
1219              BRW_NEW_BATCH |
1220              BRW_NEW_CS_PROG_DATA,
1221    },
1222    .emit = brw_upload_cs_abo_surfaces,
1223 };
1224
1225 static void
1226 brw_upload_cs_image_surfaces(struct brw_context *brw)
1227 {
1228    struct gl_context *ctx = &brw->ctx;
1229    /* _NEW_PROGRAM */
1230    struct gl_shader_program *prog =
1231       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1232
1233    if (prog) {
1234       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1235       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1236                                 &brw->cs.base, &brw->cs.prog_data->base);
1237    }
1238 }
1239
1240 const struct brw_tracked_state brw_cs_image_surfaces = {
1241    .dirty = {
1242       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1243       .brw = BRW_NEW_BATCH |
1244              BRW_NEW_BLORP |
1245              BRW_NEW_CS_PROG_DATA |
1246              BRW_NEW_IMAGE_UNITS
1247    },
1248    .emit = brw_upload_cs_image_surfaces,
1249 };
1250
1251 static uint32_t
1252 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1253 {
1254    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1255    uint32_t hw_format = brw_format_for_mesa_format(format);
1256    if (access == GL_WRITE_ONLY) {
1257       return hw_format;
1258    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1259       /* Typed surface reads support a very limited subset of the shader
1260        * image formats.  Translate it into the closest format the
1261        * hardware supports.
1262        */
1263       return isl_lower_storage_image_format(devinfo, hw_format);
1264    } else {
1265       /* The hardware doesn't actually support a typed format that we can use
1266        * so we have to fall back to untyped read/write messages.
1267        */
1268       return BRW_SURFACEFORMAT_RAW;
1269    }
1270 }
1271
1272 static void
1273 update_default_image_param(struct brw_context *brw,
1274                            struct gl_image_unit *u,
1275                            unsigned surface_idx,
1276                            struct brw_image_param *param)
1277 {
1278    memset(param, 0, sizeof(*param));
1279    param->surface_idx = surface_idx;
1280    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1281     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1282     * detailed explanation of these parameters.
1283     */
1284    param->swizzling[0] = 0xff;
1285    param->swizzling[1] = 0xff;
1286 }
1287
1288 static void
1289 update_buffer_image_param(struct brw_context *brw,
1290                           struct gl_image_unit *u,
1291                           unsigned surface_idx,
1292                           struct brw_image_param *param)
1293 {
1294    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1295
1296    update_default_image_param(brw, u, surface_idx, param);
1297
1298    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1299    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1300 }
1301
1302 static void
1303 update_texture_image_param(struct brw_context *brw,
1304                            struct gl_image_unit *u,
1305                            unsigned surface_idx,
1306                            struct brw_image_param *param)
1307 {
1308    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1309
1310    update_default_image_param(brw, u, surface_idx, param);
1311
1312    param->size[0] = minify(mt->logical_width0, u->Level);
1313    param->size[1] = minify(mt->logical_height0, u->Level);
1314    param->size[2] = (!u->Layered ? 1 :
1315                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1316                      u->TexObj->Target == GL_TEXTURE_3D ?
1317                      minify(mt->logical_depth0, u->Level) :
1318                      mt->logical_depth0);
1319
1320    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1321                                   &param->offset[0],
1322                                   &param->offset[1]);
1323
1324    param->stride[0] = mt->cpp;
1325    param->stride[1] = mt->pitch / mt->cpp;
1326    param->stride[2] =
1327       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1328    param->stride[3] =
1329       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1330
1331    if (mt->tiling == I915_TILING_X) {
1332       /* An X tile is a rectangular block of 512x8 bytes. */
1333       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1334       param->tiling[1] = _mesa_logbase2(8);
1335
1336       if (brw->has_swizzling) {
1337          /* Right shifts required to swizzle bits 9 and 10 of the memory
1338           * address with bit 6.
1339           */
1340          param->swizzling[0] = 3;
1341          param->swizzling[1] = 4;
1342       }
1343    } else if (mt->tiling == I915_TILING_Y) {
1344       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1345        * different to the layout of an X-tiled surface, we simply pretend that
1346        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1347        * one arranged in X-major order just like is the case for X-tiling.
1348        */
1349       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1350       param->tiling[1] = _mesa_logbase2(32);
1351
1352       if (brw->has_swizzling) {
1353          /* Right shift required to swizzle bit 9 of the memory address with
1354           * bit 6.
1355           */
1356          param->swizzling[0] = 3;
1357       }
1358    }
1359
1360    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1361     * address calculation algorithm (emit_address_calculation() in
1362     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1363     * modulus equal to the LOD.
1364     */
1365    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1366                        0);
1367 }
1368
1369 static void
1370 update_image_surface(struct brw_context *brw,
1371                      struct gl_image_unit *u,
1372                      GLenum access,
1373                      unsigned surface_idx,
1374                      uint32_t *surf_offset,
1375                      struct brw_image_param *param)
1376 {
1377    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1378       struct gl_texture_object *obj = u->TexObj;
1379       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1380
1381       if (obj->Target == GL_TEXTURE_BUFFER) {
1382          struct intel_buffer_object *intel_obj =
1383             intel_buffer_object(obj->BufferObject);
1384          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1385                                       _mesa_get_format_bytes(u->_ActualFormat));
1386
1387          brw->vtbl.emit_buffer_surface_state(
1388             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1389             format, intel_obj->Base.Size / texel_size, texel_size,
1390             access != GL_READ_ONLY);
1391
1392          update_buffer_image_param(brw, u, surface_idx, param);
1393
1394       } else {
1395          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1396          struct intel_mipmap_tree *mt = intel_obj->mt;
1397
1398          if (format == BRW_SURFACEFORMAT_RAW) {
1399             brw->vtbl.emit_buffer_surface_state(
1400                brw, surf_offset, mt->bo, mt->offset,
1401                format, mt->bo->size - mt->offset, 1 /* pitch */,
1402                access != GL_READ_ONLY);
1403
1404          } else {
1405             const unsigned num_layers = (!u->Layered ? 1 :
1406                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1407                                          mt->logical_depth0);
1408
1409             struct isl_view view = {
1410                .format = format,
1411                .base_level = obj->MinLevel + u->Level,
1412                .levels = 1,
1413                .base_array_layer = obj->MinLayer + u->_Layer,
1414                .array_len = num_layers,
1415                .channel_select = {
1416                   ISL_CHANNEL_SELECT_RED,
1417                   ISL_CHANNEL_SELECT_GREEN,
1418                   ISL_CHANNEL_SELECT_BLUE,
1419                   ISL_CHANNEL_SELECT_ALPHA,
1420                },
1421                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1422             };
1423
1424             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1425
1426             brw_emit_surface_state(brw, mt, &view,
1427                                    surface_state_infos[brw->gen].rb_mocs, false,
1428                                    surf_offset, surf_index,
1429                                    I915_GEM_DOMAIN_SAMPLER,
1430                                    access == GL_READ_ONLY ? 0 :
1431                                              I915_GEM_DOMAIN_SAMPLER);
1432          }
1433
1434          update_texture_image_param(brw, u, surface_idx, param);
1435       }
1436
1437    } else {
1438       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1439       update_default_image_param(brw, u, surface_idx, param);
1440    }
1441 }
1442
1443 void
1444 brw_upload_image_surfaces(struct brw_context *brw,
1445                           struct gl_linked_shader *shader,
1446                           struct brw_stage_state *stage_state,
1447                           struct brw_stage_prog_data *prog_data)
1448 {
1449    struct gl_context *ctx = &brw->ctx;
1450
1451    if (shader && shader->NumImages) {
1452       for (unsigned i = 0; i < shader->NumImages; i++) {
1453          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1454          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1455
1456          update_image_surface(brw, u, shader->ImageAccess[i],
1457                               surf_idx,
1458                               &stage_state->surf_offset[surf_idx],
1459                               &prog_data->image_param[i]);
1460       }
1461
1462       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1463       /* This may have changed the image metadata dependent on the context
1464        * image unit state and passed to the program as uniforms, make sure
1465        * that push and pull constants are reuploaded.
1466        */
1467       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1468    }
1469 }
1470
1471 static void
1472 brw_upload_wm_image_surfaces(struct brw_context *brw)
1473 {
1474    struct gl_context *ctx = &brw->ctx;
1475    /* BRW_NEW_FRAGMENT_PROGRAM */
1476    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1477
1478    if (prog) {
1479       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1480       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1481                                 &brw->wm.base, &brw->wm.prog_data->base);
1482    }
1483 }
1484
1485 const struct brw_tracked_state brw_wm_image_surfaces = {
1486    .dirty = {
1487       .mesa = _NEW_TEXTURE,
1488       .brw = BRW_NEW_BATCH |
1489              BRW_NEW_BLORP |
1490              BRW_NEW_FRAGMENT_PROGRAM |
1491              BRW_NEW_FS_PROG_DATA |
1492              BRW_NEW_IMAGE_UNITS
1493    },
1494    .emit = brw_upload_wm_image_surfaces,
1495 };
1496
1497 void
1498 gen4_init_vtable_surface_functions(struct brw_context *brw)
1499 {
1500    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1501    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1502    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1503    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1504 }
1505
1506 static void
1507 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1508 {
1509    struct gl_context *ctx = &brw->ctx;
1510    /* _NEW_PROGRAM */
1511    struct gl_shader_program *prog =
1512       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1513
1514    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1515       const unsigned surf_idx =
1516          brw->cs.prog_data->binding_table.work_groups_start;
1517       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1518       drm_intel_bo *bo;
1519       uint32_t bo_offset;
1520
1521       if (brw->compute.num_work_groups_bo == NULL) {
1522          bo = NULL;
1523          intel_upload_data(brw,
1524                            (void *)brw->compute.num_work_groups,
1525                            3 * sizeof(GLuint),
1526                            sizeof(GLuint),
1527                            &bo,
1528                            &bo_offset);
1529       } else {
1530          bo = brw->compute.num_work_groups_bo;
1531          bo_offset = brw->compute.num_work_groups_offset;
1532       }
1533
1534       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1535                                           bo, bo_offset,
1536                                           BRW_SURFACEFORMAT_RAW,
1537                                           3 * sizeof(GLuint), 1, true);
1538       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1539    }
1540 }
1541
1542 const struct brw_tracked_state brw_cs_work_groups_surface = {
1543    .dirty = {
1544       .brw = BRW_NEW_BLORP |
1545              BRW_NEW_CS_WORK_GROUPS
1546    },
1547    .emit = brw_upload_cs_work_groups_surface,
1548 };