src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 enum {
  59    INTEL_RENDERBUFFER_LAYERED = 1 << 0,
  60    INTEL_AUX_BUFFER_DISABLED = 1 << 1,
  61 };
  62
  63 struct surface_state_info {
  64    unsigned num_dwords;
  65    unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
  66    unsigned reloc_dw;
  67    unsigned aux_reloc_dw;
  68    unsigned tex_mocs;
  69    unsigned rb_mocs;
  70 };
  71
  72 static const struct surface_state_info surface_state_infos[] = {
  73    [4] = {6,  32, 1,  0},
  74    [5] = {6,  32, 1,  0},
  75    [6] = {6,  32, 1,  0},
  76    [7] = {8,  32, 1,  6,  GEN7_MOCS_L3, GEN7_MOCS_L3},
  77    [8] = {13, 64, 8,  10, BDW_MOCS_WB,  BDW_MOCS_PTE},
  78    [9] = {16, 64, 8,  10, SKL_MOCS_WB,  SKL_MOCS_PTE},
  79 };
  80
  81 static void
  82 brw_emit_surface_state(struct brw_context *brw,
  83                        struct intel_mipmap_tree *mt, uint32_t flags,
  84                        GLenum target, struct isl_view view,
  85                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
  86                        unsigned read_domains, unsigned write_domains)
  87 {
  88    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
  89    uint32_t tile_x = mt->level[0].slice[0].x_offset;
  90    uint32_t tile_y = mt->level[0].slice[0].y_offset;
  91    uint32_t offset = mt->offset;
  92
  93    struct isl_surf surf;
  94    intel_miptree_get_isl_surf(brw, mt, &surf);
  95
  96    surf.dim = get_isl_surf_dim(target);
  97
  98    const enum isl_dim_layout dim_layout =
  99       get_isl_dim_layout(&brw->screen->devinfo, mt->tiling, target);
 100
 101    if (surf.dim_layout != dim_layout) {
 102       /* The layout of the specified texture target is not compatible with the
 103        * actual layout of the miptree structure in memory -- You're entering
 104        * dangerous territory, this can only possibly work if you only intended
 105        * to access a single level and slice of the texture, and the hardware
 106        * supports the tile offset feature in order to allow non-tile-aligned
 107        * base offsets, since we'll have to point the hardware to the first
 108        * texel of the level instead of relying on the usual base level/layer
 109        * controls.
 110        */
 111       assert(brw->has_surface_tile_offset);
 112       assert(view.levels == 1 && view.array_len == 1);
 113       assert(tile_x == 0 && tile_y == 0);
 114
 115       offset += intel_miptree_get_tile_offsets(mt, view.base_level,
 116                                                view.base_array_layer,
 117                                                &tile_x, &tile_y);
 118
 119       /* Minify the logical dimensions of the texture. */
 120       const unsigned l = view.base_level - mt->first_level;
 121       surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
 122       surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
 123          minify(surf.logical_level0_px.height, l);
 124       surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
 125          minify(surf.logical_level0_px.depth, l);
 126
 127       /* Only the base level and layer can be addressed with the overridden
 128        * layout.
 129        */
 130       surf.logical_level0_px.array_len = 1;
 131       surf.levels = 1;
 132       surf.dim_layout = dim_layout;
 133
 134       /* The requested slice of the texture is now at the base level and
 135        * layer.
 136        */
 137       view.base_level = 0;
 138       view.base_array_layer = 0;
 139    }
 140
 141    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 142
 143    drm_intel_bo *aux_bo;
 144    struct isl_surf *aux_surf = NULL, aux_surf_s;
 145    uint64_t aux_offset = 0;
 146    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
 147    if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
 148        !(flags & INTEL_AUX_BUFFER_DISABLED)) {
 149       intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
 150       aux_surf = &aux_surf_s;
 151
 152       if (mt->mcs_buf) {
 153          assert(mt->mcs_buf->offset == 0);
 154          aux_bo = mt->mcs_buf->bo;
 155          aux_offset = mt->mcs_buf->bo->offset64;
 156       } else {
 157          aux_bo = mt->hiz_buf->aux_base.bo;
 158          aux_offset = mt->hiz_buf->aux_base.bo->offset64;
 159       }
 160
 161       /* We only really need a clear color if we also have an auxiliary
 162        * surface.  Without one, it does nothing.
 163        */
 164       clear_color = intel_miptree_get_isl_clear_color(brw, mt);
 165    }
 166
 167    uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 168                                     ss_info.num_dwords * 4, ss_info.ss_align,
 169                                     surf_index, surf_offset);
 170
 171    isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = &view,
 172                        .address = mt->bo->offset64 + offset,
 173                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 174                        .aux_address = aux_offset,
 175                        .mocs = mocs, .clear_color = clear_color,
 176                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 177
 178    drm_intel_bo_emit_reloc(brw->batch.bo,
 179                            *surf_offset + 4 * ss_info.reloc_dw,
 180                            mt->bo, offset,
 181                            read_domains, write_domains);
 182
 183    if (aux_surf) {
 184       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 185        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 186        * contain other control information.  Since buffer addresses are always
 187        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 188        * an ordinary reloc to do the necessary address translation.
 189        */
 190       assert((aux_offset & 0xfff) == 0);
 191       drm_intel_bo_emit_reloc(brw->batch.bo,
 192                               *surf_offset + 4 * ss_info.aux_reloc_dw,
 193                               aux_bo, dw[ss_info.aux_reloc_dw] & 0xfff,
 194                               read_domains, write_domains);
 195    }
 196 }
 197
 198 uint32_t
 199 brw_update_renderbuffer_surface(struct brw_context *brw,
 200                                 struct gl_renderbuffer *rb,
 201                                 uint32_t flags, unsigned unit /* unused */,
 202                                 uint32_t surf_index)
 203 {
 204    struct gl_context *ctx = &brw->ctx;
 205    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 206    struct intel_mipmap_tree *mt = irb->mt;
 207
 208    if (brw->gen < 9) {
 209       assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 210    }
 211
 212    assert(brw_render_target_supported(brw, rb));
 213    intel_miptree_used_for_rendering(mt);
 214
 215    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 216    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 217       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 218                     __func__, _mesa_get_format_name(rb_format));
 219    }
 220
 221    const unsigned layer_multiplier =
 222       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 223        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 224       MAX2(irb->mt->num_samples, 1) : 1;
 225
 226    struct isl_view view = {
 227       .format = brw->render_target_format[rb_format],
 228       .base_level = irb->mt_level - irb->mt->first_level,
 229       .levels = 1,
 230       .base_array_layer = irb->mt_layer / layer_multiplier,
 231       .array_len = MAX2(irb->layer_count, 1),
 232       .swizzle = ISL_SWIZZLE_IDENTITY,
 233       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 234    };
 235
 236    uint32_t offset;
 237    brw_emit_surface_state(brw, mt, flags, mt->target, view,
 238                           surface_state_infos[brw->gen].rb_mocs,
 239                           &offset, surf_index,
 240                           I915_GEM_DOMAIN_RENDER,
 241                           I915_GEM_DOMAIN_RENDER);
 242    return offset;
 243 }
 244
 245 GLuint
 246 translate_tex_target(GLenum target)
 247 {
 248    switch (target) {
 249    case GL_TEXTURE_1D:
 250    case GL_TEXTURE_1D_ARRAY_EXT:
 251       return BRW_SURFACE_1D;
 252
 253    case GL_TEXTURE_RECTANGLE_NV:
 254       return BRW_SURFACE_2D;
 255
 256    case GL_TEXTURE_2D:
 257    case GL_TEXTURE_2D_ARRAY_EXT:
 258    case GL_TEXTURE_EXTERNAL_OES:
 259    case GL_TEXTURE_2D_MULTISAMPLE:
 260    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 261       return BRW_SURFACE_2D;
 262
 263    case GL_TEXTURE_3D:
 264       return BRW_SURFACE_3D;
 265
 266    case GL_TEXTURE_CUBE_MAP:
 267    case GL_TEXTURE_CUBE_MAP_ARRAY:
 268       return BRW_SURFACE_CUBE;
 269
 270    default:
 271       unreachable("not reached");
 272    }
 273 }
 274
 275 uint32_t
 276 brw_get_surface_tiling_bits(uint32_t tiling)
 277 {
 278    switch (tiling) {
 279    case I915_TILING_X:
 280       return BRW_SURFACE_TILED;
 281    case I915_TILING_Y:
 282       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 283    default:
 284       return 0;
 285    }
 286 }
 287
 288
 289 uint32_t
 290 brw_get_surface_num_multisamples(unsigned num_samples)
 291 {
 292    if (num_samples > 1)
 293       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 294    else
 295       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 296 }
 297
 298 /**
 299  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 300  * swizzling.
 301  */
 302 int
 303 brw_get_texture_swizzle(const struct gl_context *ctx,
 304                         const struct gl_texture_object *t)
 305 {
 306    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 307
 308    int swizzles[SWIZZLE_NIL + 1] = {
 309       SWIZZLE_X,
 310       SWIZZLE_Y,
 311       SWIZZLE_Z,
 312       SWIZZLE_W,
 313       SWIZZLE_ZERO,
 314       SWIZZLE_ONE,
 315       SWIZZLE_NIL
 316    };
 317
 318    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 319        img->_BaseFormat == GL_DEPTH_STENCIL) {
 320       GLenum depth_mode = t->DepthMode;
 321
 322       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 323        * with depth component data specified with a sized internal format.
 324        * Otherwise, it's left at the old default, GL_LUMINANCE.
 325        */
 326       if (_mesa_is_gles3(ctx) &&
 327           img->InternalFormat != GL_DEPTH_COMPONENT &&
 328           img->InternalFormat != GL_DEPTH_STENCIL) {
 329          depth_mode = GL_RED;
 330       }
 331
 332       switch (depth_mode) {
 333       case GL_ALPHA:
 334          swizzles[0] = SWIZZLE_ZERO;
 335          swizzles[1] = SWIZZLE_ZERO;
 336          swizzles[2] = SWIZZLE_ZERO;
 337          swizzles[3] = SWIZZLE_X;
 338          break;
 339       case GL_LUMINANCE:
 340          swizzles[0] = SWIZZLE_X;
 341          swizzles[1] = SWIZZLE_X;
 342          swizzles[2] = SWIZZLE_X;
 343          swizzles[3] = SWIZZLE_ONE;
 344          break;
 345       case GL_INTENSITY:
 346          swizzles[0] = SWIZZLE_X;
 347          swizzles[1] = SWIZZLE_X;
 348          swizzles[2] = SWIZZLE_X;
 349          swizzles[3] = SWIZZLE_X;
 350          break;
 351       case GL_RED:
 352          swizzles[0] = SWIZZLE_X;
 353          swizzles[1] = SWIZZLE_ZERO;
 354          swizzles[2] = SWIZZLE_ZERO;
 355          swizzles[3] = SWIZZLE_ONE;
 356          break;
 357       }
 358    }
 359
 360    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 361
 362    /* If the texture's format is alpha-only, force R, G, and B to
 363     * 0.0. Similarly, if the texture's format has no alpha channel,
 364     * force the alpha value read to 1.0. This allows for the
 365     * implementation to use an RGBA texture for any of these formats
 366     * without leaking any unexpected values.
 367     */
 368    switch (img->_BaseFormat) {
 369    case GL_ALPHA:
 370       swizzles[0] = SWIZZLE_ZERO;
 371       swizzles[1] = SWIZZLE_ZERO;
 372       swizzles[2] = SWIZZLE_ZERO;
 373       break;
 374    case GL_LUMINANCE:
 375       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 376          swizzles[0] = SWIZZLE_X;
 377          swizzles[1] = SWIZZLE_X;
 378          swizzles[2] = SWIZZLE_X;
 379          swizzles[3] = SWIZZLE_ONE;
 380       }
 381       break;
 382    case GL_LUMINANCE_ALPHA:
 383       if (datatype == GL_SIGNED_NORMALIZED) {
 384          swizzles[0] = SWIZZLE_X;
 385          swizzles[1] = SWIZZLE_X;
 386          swizzles[2] = SWIZZLE_X;
 387          swizzles[3] = SWIZZLE_W;
 388       }
 389       break;
 390    case GL_INTENSITY:
 391       if (datatype == GL_SIGNED_NORMALIZED) {
 392          swizzles[0] = SWIZZLE_X;
 393          swizzles[1] = SWIZZLE_X;
 394          swizzles[2] = SWIZZLE_X;
 395          swizzles[3] = SWIZZLE_X;
 396       }
 397       break;
 398    case GL_RED:
 399    case GL_RG:
 400    case GL_RGB:
 401       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 402          swizzles[3] = SWIZZLE_ONE;
 403       break;
 404    }
 405
 406    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 407                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 408                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 409                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 410 }
 411
 412 /**
 413  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 414  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 415  *
 416  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 417  *         0          1          2          3             4            5
 418  *         4          5          6          7             0            1
 419  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 420  *
 421  * which is simply adding 4 then modding by 8 (or anding with 7).
 422  *
 423  * We then may need to apply workarounds for textureGather hardware bugs.
 424  */
 425 static unsigned
 426 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 427 {
 428    unsigned scs = (swizzle + 4) & 7;
 429
 430    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 431 }
 432
 433 static unsigned
 434 brw_find_matching_rb(const struct gl_framebuffer *fb,
 435                      const struct intel_mipmap_tree *mt)
 436 {
 437    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 438       const struct intel_renderbuffer *irb =
 439          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 440
 441       if (irb && irb->mt == mt)
 442          return i;
 443    }
 444
 445    return fb->_NumColorDrawBuffers;
 446 }
 447
 448 static inline bool
 449 brw_texture_view_sane(const struct brw_context *brw,
 450                       const struct intel_mipmap_tree *mt, unsigned format)
 451 {
 452    /* There are special cases only for lossless compression. */
 453    if (!intel_miptree_is_lossless_compressed(brw, mt))
 454       return true;
 455
 456    if (isl_format_supports_lossless_compression(&brw->screen->devinfo,
 457                                                 format))
 458       return true;
 459
 460    /* Logic elsewhere needs to take care to resolve the color buffer prior
 461     * to sampling it as non-compressed.
 462     */
 463    if (mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)
 464       return false;
 465
 466    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 467    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 468
 469    if (rb_index == fb->_NumColorDrawBuffers)
 470       return true;
 471
 472    /* Underlying surface is compressed but it is sampled using a format that
 473     * the sampling engine doesn't support as compressed. Compression must be
 474     * disabled for both sampling engine and data port in case the same surface
 475     * is used also as render target.
 476     */
 477    return brw->draw_aux_buffer_disabled[rb_index];
 478 }
 479
 480 static bool
 481 brw_disable_aux_surface(const struct brw_context *brw,
 482                         const struct intel_mipmap_tree *mt)
 483 {
 484    /* Nothing to disable. */
 485    if (!mt->mcs_buf)
 486       return false;
 487
 488    /* There are special cases only for lossless compression. */
 489    if (!intel_miptree_is_lossless_compressed(brw, mt))
 490       return mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED;
 491
 492    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 493    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 494
 495    /* If we are drawing into this with compression enabled, then we must also
 496     * enable compression when texturing from it regardless of
 497     * fast_clear_state.  If we don't then, after the first draw call with
 498     * this setup, there will be data in the CCS which won't get picked up by
 499     * subsequent texturing operations as required by ARB_texture_barrier.
 500     * Since we don't want to re-emit the binding table or do a resolve
 501     * operation every draw call, the easiest thing to do is just enable
 502     * compression on the texturing side.  This is completely safe to do
 503     * since, if compressed texturing weren't allowed, we would have disabled
 504     * compression of render targets in whatever_that_function_is_called().
 505     */
 506    if (rb_index < fb->_NumColorDrawBuffers) {
 507       if (brw->draw_aux_buffer_disabled[rb_index]) {
 508          assert(mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED);
 509       }
 510
 511       return brw->draw_aux_buffer_disabled[rb_index];
 512    }
 513
 514    return mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED;
 515 }
 516
 517 void
 518 brw_update_texture_surface(struct gl_context *ctx,
 519                            unsigned unit,
 520                            uint32_t *surf_offset,
 521                            bool for_gather,
 522                            uint32_t plane)
 523 {
 524    struct brw_context *brw = brw_context(ctx);
 525    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 526
 527    if (obj->Target == GL_TEXTURE_BUFFER) {
 528       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 529
 530    } else {
 531       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 532       struct intel_mipmap_tree *mt = intel_obj->mt;
 533
 534       if (plane > 0) {
 535          if (mt->plane[plane - 1] == NULL)
 536             return;
 537          mt = mt->plane[plane - 1];
 538       }
 539
 540       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 541       /* If this is a view with restricted NumLayers, then our effective depth
 542        * is not just the miptree depth.
 543        */
 544       const unsigned view_num_layers =
 545          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 546                                                             mt->logical_depth0;
 547
 548       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 549        * texturing functions that return a float, as our code generation always
 550        * selects the .x channel (which would always be 0).
 551        */
 552       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 553       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 554          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 555           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 556       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 557                                 brw_get_texture_swizzle(&brw->ctx, obj));
 558
 559       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 560       unsigned format = translate_tex_format(brw, mesa_fmt,
 561                                              sampler->sRGBDecode);
 562
 563       /* Implement gen6 and gen7 gather work-around */
 564       bool need_green_to_blue = false;
 565       if (for_gather) {
 566          if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
 567             format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 568             need_green_to_blue = brw->is_haswell;
 569          } else if (brw->gen == 6) {
 570             /* Sandybridge's gather4 message is broken for integer formats.
 571              * To work around this, we pretend the surface is UNORM for
 572              * 8 or 16-bit formats, and emit shader instructions to recover
 573              * the real INT/UINT value.  For 32-bit formats, we pretend
 574              * the surface is FLOAT, and simply reinterpret the resulting
 575              * bits.
 576              */
 577             switch (format) {
 578             case BRW_SURFACEFORMAT_R8_SINT:
 579             case BRW_SURFACEFORMAT_R8_UINT:
 580                format = BRW_SURFACEFORMAT_R8_UNORM;
 581                break;
 582
 583             case BRW_SURFACEFORMAT_R16_SINT:
 584             case BRW_SURFACEFORMAT_R16_UINT:
 585                format = BRW_SURFACEFORMAT_R16_UNORM;
 586                break;
 587
 588             case BRW_SURFACEFORMAT_R32_SINT:
 589             case BRW_SURFACEFORMAT_R32_UINT:
 590                format = BRW_SURFACEFORMAT_R32_FLOAT;
 591                break;
 592
 593             default:
 594                break;
 595             }
 596          }
 597       }
 598
 599       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 600          if (brw->gen <= 7) {
 601             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 602             mt = mt->r8stencil_mt;
 603          } else {
 604             mt = mt->stencil_mt;
 605          }
 606          format = BRW_SURFACEFORMAT_R8_UINT;
 607       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 608          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 609          mt = mt->r8stencil_mt;
 610          format = BRW_SURFACEFORMAT_R8_UINT;
 611       }
 612
 613       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 614
 615       struct isl_view view = {
 616          .format = format,
 617          .base_level = obj->MinLevel + obj->BaseLevel,
 618          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 619          .base_array_layer = obj->MinLayer,
 620          .array_len = view_num_layers,
 621          .swizzle = {
 622             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 623             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 624             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 625             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 626          },
 627          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 628       };
 629
 630       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 631           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 632          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 633
 634       assert(brw_texture_view_sane(brw, mt, format));
 635
 636       const int flags =
 637          brw_disable_aux_surface(brw, mt) ? INTEL_AUX_BUFFER_DISABLED : 0;
 638       brw_emit_surface_state(brw, mt, flags, mt->target, view,
 639                              surface_state_infos[brw->gen].tex_mocs,
 640                              surf_offset, surf_index,
 641                              I915_GEM_DOMAIN_SAMPLER, 0);
 642    }
 643 }
 644
 645 void
 646 brw_emit_buffer_surface_state(struct brw_context *brw,
 647                               uint32_t *out_offset,
 648                               drm_intel_bo *bo,
 649                               unsigned buffer_offset,
 650                               unsigned surface_format,
 651                               unsigned buffer_size,
 652                               unsigned pitch,
 653                               bool rw)
 654 {
 655    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
 656
 657    uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 658                                   ss_info.num_dwords * 4, ss_info.ss_align,
 659                                   out_offset);
 660
 661    isl_buffer_fill_state(&brw->isl_dev, dw,
 662                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 663                          .size = buffer_size,
 664                          .format = surface_format,
 665                          .stride = pitch,
 666                          .mocs = ss_info.tex_mocs);
 667
 668    if (bo) {
 669       drm_intel_bo_emit_reloc(brw->batch.bo,
 670                               *out_offset + 4 * ss_info.reloc_dw,
 671                               bo, buffer_offset,
 672                               I915_GEM_DOMAIN_SAMPLER,
 673                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 674    }
 675 }
 676
 677 void
 678 brw_update_buffer_texture_surface(struct gl_context *ctx,
 679                                   unsigned unit,
 680                                   uint32_t *surf_offset)
 681 {
 682    struct brw_context *brw = brw_context(ctx);
 683    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 684    struct intel_buffer_object *intel_obj =
 685       intel_buffer_object(tObj->BufferObject);
 686    uint32_t size = tObj->BufferSize;
 687    drm_intel_bo *bo = NULL;
 688    mesa_format format = tObj->_BufferObjectFormat;
 689    uint32_t brw_format = brw_format_for_mesa_format(format);
 690    int texel_size = _mesa_get_format_bytes(format);
 691
 692    if (intel_obj) {
 693       size = MIN2(size, intel_obj->Base.Size);
 694       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 695    }
 696
 697    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 698       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 699                     _mesa_get_format_name(format));
 700    }
 701
 702    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 703                                  tObj->BufferOffset,
 704                                  brw_format,
 705                                  size,
 706                                  texel_size,
 707                                  false /* rw */);
 708 }
 709
 710 /**
 711  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 712  * read from this buffer with Data Port Read instructions/messages.
 713  */
 714 void
 715 brw_create_constant_surface(struct brw_context *brw,
 716                             drm_intel_bo *bo,
 717                             uint32_t offset,
 718                             uint32_t size,
 719                             uint32_t *out_offset)
 720 {
 721    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 722                                  BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 723                                  size, 1, false);
 724 }
 725
 726 /**
 727  * Create the buffer surface. Shader buffer variables will be
 728  * read from / write to this buffer with Data Port Read/Write
 729  * instructions/messages.
 730  */
 731 void
 732 brw_create_buffer_surface(struct brw_context *brw,
 733                           drm_intel_bo *bo,
 734                           uint32_t offset,
 735                           uint32_t size,
 736                           uint32_t *out_offset)
 737 {
 738    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 739     * messages. We need these specifically for the fragment shader since they
 740     * include a pixel mask header that we need to ensure correct behavior
 741     * with helper invocations, which cannot write to the buffer.
 742     */
 743    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 744                                  BRW_SURFACEFORMAT_RAW,
 745                                  size, 1, true);
 746 }
 747
 748 /**
 749  * Set up a binding table entry for use by stream output logic (transform
 750  * feedback).
 751  *
 752  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 753  */
 754 void
 755 brw_update_sol_surface(struct brw_context *brw,
 756                        struct gl_buffer_object *buffer_obj,
 757                        uint32_t *out_offset, unsigned num_vector_components,
 758                        unsigned stride_dwords, unsigned offset_dwords)
 759 {
 760    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 761    uint32_t offset_bytes = 4 * offset_dwords;
 762    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 763                                              offset_bytes,
 764                                              buffer_obj->Size - offset_bytes);
 765    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 766                                     out_offset);
 767    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 768    size_t size_dwords = buffer_obj->Size / 4;
 769    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 770
 771    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 772     * too big to map using a single binding table entry?
 773     */
 774    assert((size_dwords - offset_dwords) / stride_dwords
 775           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 776
 777    if (size_dwords > offset_dwords + num_vector_components) {
 778       /* There is room for at least 1 transform feedback output in the buffer.
 779        * Compute the number of additional transform feedback outputs the
 780        * buffer has room for.
 781        */
 782       buffer_size_minus_1 =
 783          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 784    } else {
 785       /* There isn't even room for a single transform feedback output in the
 786        * buffer.  We can't configure the binding table entry to prevent output
 787        * entirely; we'll have to rely on the geometry shader to detect
 788        * overflow.  But to minimize the damage in case of a bug, set up the
 789        * binding table entry to just allow a single output.
 790        */
 791       buffer_size_minus_1 = 0;
 792    }
 793    width = buffer_size_minus_1 & 0x7f;
 794    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 795    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 796
 797    switch (num_vector_components) {
 798    case 1:
 799       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 800       break;
 801    case 2:
 802       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 803       break;
 804    case 3:
 805       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 806       break;
 807    case 4:
 808       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 809       break;
 810    default:
 811       unreachable("Invalid vector size for transform feedback output");
 812    }
 813
 814    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 815       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 816       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 817       BRW_SURFACE_RC_READ_WRITE;
 818    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 819    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 820               height << BRW_SURFACE_HEIGHT_SHIFT);
 821    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 822               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 823    surf[4] = 0;
 824    surf[5] = 0;
 825
 826    /* Emit relocation to surface contents. */
 827    drm_intel_bo_emit_reloc(brw->batch.bo,
 828                            *out_offset + 4,
 829                            bo, offset_bytes,
 830                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 831 }
 832
 833 /* Creates a new WM constant buffer reflecting the current fragment program's
 834  * constants, if needed by the fragment program.
 835  *
 836  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 837  * state atom.
 838  */
 839 static void
 840 brw_upload_wm_pull_constants(struct brw_context *brw)
 841 {
 842    struct brw_stage_state *stage_state = &brw->wm.base;
 843    /* BRW_NEW_FRAGMENT_PROGRAM */
 844    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 845    /* BRW_NEW_FS_PROG_DATA */
 846    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 847
 848    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 849    /* _NEW_PROGRAM_CONSTANTS */
 850    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 851                              stage_state, prog_data);
 852 }
 853
 854 const struct brw_tracked_state brw_wm_pull_constants = {
 855    .dirty = {
 856       .mesa = _NEW_PROGRAM_CONSTANTS,
 857       .brw = BRW_NEW_BATCH |
 858              BRW_NEW_BLORP |
 859              BRW_NEW_FRAGMENT_PROGRAM |
 860              BRW_NEW_FS_PROG_DATA,
 861    },
 862    .emit = brw_upload_wm_pull_constants,
 863 };
 864
 865 /**
 866  * Creates a null renderbuffer surface.
 867  *
 868  * This is used when the shader doesn't write to any color output.  An FB
 869  * write to target 0 will still be emitted, because that's how the thread is
 870  * terminated (and computed depth is returned), so we need to have the
 871  * hardware discard the target 0 color output..
 872  */
 873 static void
 874 brw_emit_null_surface_state(struct brw_context *brw,
 875                             unsigned width,
 876                             unsigned height,
 877                             unsigned samples,
 878                             uint32_t *out_offset)
 879 {
 880    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 881     * Notes):
 882     *
 883     *     A null surface will be used in instances where an actual surface is
 884     *     not bound. When a write message is generated to a null surface, no
 885     *     actual surface is written to. When a read message (including any
 886     *     sampling engine message) is generated to a null surface, the result
 887     *     is all zeros. Note that a null surface type is allowed to be used
 888     *     with all messages, even if it is not specificially indicated as
 889     *     supported. All of the remaining fields in surface state are ignored
 890     *     for null surfaces, with the following exceptions:
 891     *
 892     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 893     *       depth buffer’s corresponding state for all render target surfaces,
 894     *       including null.
 895     *
 896     *     - Surface Format must be R8G8B8A8_UNORM.
 897     */
 898    unsigned surface_type = BRW_SURFACE_NULL;
 899    drm_intel_bo *bo = NULL;
 900    unsigned pitch_minus_1 = 0;
 901    uint32_t multisampling_state = 0;
 902    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 903                                     out_offset);
 904
 905    if (samples > 1) {
 906       /* On Gen6, null render targets seem to cause GPU hangs when
 907        * multisampling.  So work around this problem by rendering into dummy
 908        * color buffer.
 909        *
 910        * To decrease the amount of memory needed by the workaround buffer, we
 911        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 912        * the amount of memory needed for the workaround buffer is
 913        * (width_in_tiles + height_in_tiles - 1) tiles.
 914        *
 915        * Note that since the workaround buffer will be interpreted by the
 916        * hardware as an interleaved multisampled buffer, we need to compute
 917        * width_in_tiles and height_in_tiles by dividing the width and height
 918        * by 16 rather than the normal Y-tile size of 32.
 919        */
 920       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 921       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 922       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 923       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 924                          size_needed);
 925       bo = brw->wm.multisampled_null_render_target_bo;
 926       surface_type = BRW_SURFACE_2D;
 927       pitch_minus_1 = 127;
 928       multisampling_state = brw_get_surface_num_multisamples(samples);
 929    }
 930
 931    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 932               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 933    if (brw->gen < 6) {
 934       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 935                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 936                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 937                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 938    }
 939    surf[1] = bo ? bo->offset64 : 0;
 940    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 941               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 942
 943    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 944     * Notes):
 945     *
 946     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 947     */
 948    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 949               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 950    surf[4] = multisampling_state;
 951    surf[5] = 0;
 952
 953    if (bo) {
 954       drm_intel_bo_emit_reloc(brw->batch.bo,
 955                               *out_offset + 4,
 956                               bo, 0,
 957                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 958    }
 959 }
 960
 961 /**
 962  * Sets up a surface state structure to point at the given region.
 963  * While it is only used for the front/back buffer currently, it should be
 964  * usable for further buffers when doing ARB_draw_buffer support.
 965  */
 966 static uint32_t
 967 gen4_update_renderbuffer_surface(struct brw_context *brw,
 968                                  struct gl_renderbuffer *rb,
 969                                  uint32_t flags, unsigned unit,
 970                                  uint32_t surf_index)
 971 {
 972    struct gl_context *ctx = &brw->ctx;
 973    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 974    struct intel_mipmap_tree *mt = irb->mt;
 975    uint32_t *surf;
 976    uint32_t tile_x, tile_y;
 977    uint32_t format = 0;
 978    uint32_t offset;
 979    /* _NEW_BUFFERS */
 980    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 981    /* BRW_NEW_FS_PROG_DATA */
 982
 983    assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
 984    assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 985
 986    if (rb->TexImage && !brw->has_surface_tile_offset) {
 987       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 988
 989       if (tile_x != 0 || tile_y != 0) {
 990          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 991           * destination in a miptree unless you actually setup your renderbuffer
 992           * as a miptree and used the fragile lod/array_index/etc. controls to
 993           * select the image.  So, instead, we just make a new single-level
 994           * miptree and render into that.
 995           */
 996          intel_renderbuffer_move_to_temp(brw, irb, false);
 997          mt = irb->mt;
 998       }
 999    }
1000
1001    intel_miptree_used_for_rendering(irb->mt);
1002
1003    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
1004
1005    format = brw->render_target_format[rb_format];
1006    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
1007       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1008                     __func__, _mesa_get_format_name(rb_format));
1009    }
1010
1011    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1012               format << BRW_SURFACE_FORMAT_SHIFT);
1013
1014    /* reloc */
1015    assert(mt->offset % mt->cpp == 0);
1016    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1017               mt->bo->offset64 + mt->offset);
1018
1019    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1020               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1021
1022    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
1023               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1024
1025    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
1026
1027    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1028    /* Note that the low bits of these fields are missing, so
1029     * there's the possibility of getting in trouble.
1030     */
1031    assert(tile_x % 4 == 0);
1032    assert(tile_y % 2 == 0);
1033    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1034               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1035               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1036
1037    if (brw->gen < 6) {
1038       /* _NEW_COLOR */
1039       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1040           (ctx->Color.BlendEnabled & (1 << unit)))
1041          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1042
1043       if (!ctx->Color.ColorMask[unit][0])
1044          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1045       if (!ctx->Color.ColorMask[unit][1])
1046          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1047       if (!ctx->Color.ColorMask[unit][2])
1048          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1049
1050       /* As mentioned above, disable writes to the alpha component when the
1051        * renderbuffer is XRGB.
1052        */
1053       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1054           !ctx->Color.ColorMask[unit][3]) {
1055          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1056       }
1057    }
1058
1059    drm_intel_bo_emit_reloc(brw->batch.bo,
1060                            offset + 4,
1061                            mt->bo,
1062                            surf[1] - mt->bo->offset64,
1063                            I915_GEM_DOMAIN_RENDER,
1064                            I915_GEM_DOMAIN_RENDER);
1065
1066    return offset;
1067 }
1068
1069 /**
1070  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1071  */
1072 void
1073 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1074                                  const struct gl_framebuffer *fb,
1075                                  uint32_t render_target_start,
1076                                  uint32_t *surf_offset)
1077 {
1078    GLuint i;
1079    const unsigned int w = _mesa_geometric_width(fb);
1080    const unsigned int h = _mesa_geometric_height(fb);
1081    const unsigned int s = _mesa_geometric_samples(fb);
1082
1083    /* Update surfaces for drawing buffers */
1084    if (fb->_NumColorDrawBuffers >= 1) {
1085       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1086          const uint32_t surf_index = render_target_start + i;
1087          const int flags = (_mesa_geometric_layers(fb) > 0 ?
1088                               INTEL_RENDERBUFFER_LAYERED : 0) |
1089                            (brw->draw_aux_buffer_disabled[i] ?
1090                               INTEL_AUX_BUFFER_DISABLED : 0);
1091
1092          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1093             surf_offset[surf_index] =
1094                brw->vtbl.update_renderbuffer_surface(
1095                   brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1096          } else {
1097             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1098                &surf_offset[surf_index]);
1099          }
1100       }
1101    } else {
1102       const uint32_t surf_index = render_target_start;
1103       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1104          &surf_offset[surf_index]);
1105    }
1106 }
1107
1108 static void
1109 update_renderbuffer_surfaces(struct brw_context *brw)
1110 {
1111    const struct gl_context *ctx = &brw->ctx;
1112
1113    /* BRW_NEW_FS_PROG_DATA */
1114    const struct brw_wm_prog_data *wm_prog_data =
1115       brw_wm_prog_data(brw->wm.base.prog_data);
1116
1117    /* _NEW_BUFFERS | _NEW_COLOR */
1118    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1119    brw_update_renderbuffer_surfaces(
1120       brw, fb,
1121       wm_prog_data->binding_table.render_target_start,
1122       brw->wm.base.surf_offset);
1123    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1124 }
1125
1126 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1127    .dirty = {
1128       .mesa = _NEW_BUFFERS |
1129               _NEW_COLOR,
1130       .brw = BRW_NEW_BATCH |
1131              BRW_NEW_BLORP |
1132              BRW_NEW_FS_PROG_DATA,
1133    },
1134    .emit = update_renderbuffer_surfaces,
1135 };
1136
1137 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1138    .dirty = {
1139       .mesa = _NEW_BUFFERS,
1140       .brw = BRW_NEW_BATCH |
1141              BRW_NEW_BLORP,
1142    },
1143    .emit = update_renderbuffer_surfaces,
1144 };
1145
1146 static void
1147 update_renderbuffer_read_surfaces(struct brw_context *brw)
1148 {
1149    const struct gl_context *ctx = &brw->ctx;
1150
1151    /* BRW_NEW_FS_PROG_DATA */
1152    const struct brw_wm_prog_data *wm_prog_data =
1153       brw_wm_prog_data(brw->wm.base.prog_data);
1154
1155    /* BRW_NEW_FRAGMENT_PROGRAM */
1156    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1157        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1158       /* _NEW_BUFFERS */
1159       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1160
1161       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1162          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1163          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1164          const unsigned surf_index =
1165             wm_prog_data->binding_table.render_target_read_start + i;
1166          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1167
1168          if (irb) {
1169             const unsigned format = brw->render_target_format[
1170                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1171             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1172                                                 format));
1173
1174             /* Override the target of the texture if the render buffer is a
1175              * single slice of a 3D texture (since the minimum array element
1176              * field of the surface state structure is ignored by the sampler
1177              * unit for 3D textures on some hardware), or if the render buffer
1178              * is a 1D array (since shaders always provide the array index
1179              * coordinate at the Z component to avoid state-dependent
1180              * recompiles when changing the texture target of the
1181              * framebuffer).
1182              */
1183             const GLenum target =
1184                (irb->mt->target == GL_TEXTURE_3D &&
1185                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1186                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1187                irb->mt->target;
1188
1189             /* intel_renderbuffer::mt_layer is expressed in sample units for
1190              * the UMS and CMS multisample layouts, but
1191              * intel_renderbuffer::layer_count is expressed in units of whole
1192              * logical layers regardless of the multisample layout.
1193              */
1194             const unsigned mt_layer_unit =
1195                (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
1196                 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
1197                MAX2(irb->mt->num_samples, 1) : 1;
1198
1199             const struct isl_view view = {
1200                .format = format,
1201                .base_level = irb->mt_level - irb->mt->first_level,
1202                .levels = 1,
1203                .base_array_layer = irb->mt_layer / mt_layer_unit,
1204                .array_len = irb->layer_count,
1205                .swizzle = ISL_SWIZZLE_IDENTITY,
1206                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1207             };
1208
1209             const int flags = brw->draw_aux_buffer_disabled[i] ?
1210                                  INTEL_AUX_BUFFER_DISABLED : 0;
1211             brw_emit_surface_state(brw, irb->mt, flags, target, view,
1212                                    surface_state_infos[brw->gen].tex_mocs,
1213                                    surf_offset, surf_index,
1214                                    I915_GEM_DOMAIN_SAMPLER, 0);
1215
1216          } else {
1217             brw->vtbl.emit_null_surface_state(
1218                brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1219                _mesa_geometric_samples(fb), surf_offset);
1220          }
1221       }
1222
1223       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1224    }
1225 }
1226
1227 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1228    .dirty = {
1229       .mesa = _NEW_BUFFERS,
1230       .brw = BRW_NEW_BATCH |
1231              BRW_NEW_FRAGMENT_PROGRAM |
1232              BRW_NEW_FS_PROG_DATA,
1233    },
1234    .emit = update_renderbuffer_read_surfaces,
1235 };
1236
1237 static void
1238 update_stage_texture_surfaces(struct brw_context *brw,
1239                               const struct gl_program *prog,
1240                               struct brw_stage_state *stage_state,
1241                               bool for_gather, uint32_t plane)
1242 {
1243    if (!prog)
1244       return;
1245
1246    struct gl_context *ctx = &brw->ctx;
1247
1248    uint32_t *surf_offset = stage_state->surf_offset;
1249
1250    /* BRW_NEW_*_PROG_DATA */
1251    if (for_gather)
1252       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1253    else
1254       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1255
1256    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1257    for (unsigned s = 0; s < num_samplers; s++) {
1258       surf_offset[s] = 0;
1259
1260       if (prog->SamplersUsed & (1 << s)) {
1261          const unsigned unit = prog->SamplerUnits[s];
1262
1263          /* _NEW_TEXTURE */
1264          if (ctx->Texture.Unit[unit]._Current) {
1265             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1266          }
1267       }
1268    }
1269 }
1270
1271
1272 /**
1273  * Construct SURFACE_STATE objects for enabled textures.
1274  */
1275 static void
1276 brw_update_texture_surfaces(struct brw_context *brw)
1277 {
1278    /* BRW_NEW_VERTEX_PROGRAM */
1279    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1280
1281    /* BRW_NEW_TESS_PROGRAMS */
1282    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1283    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1284
1285    /* BRW_NEW_GEOMETRY_PROGRAM */
1286    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1287
1288    /* BRW_NEW_FRAGMENT_PROGRAM */
1289    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1290
1291    /* _NEW_TEXTURE */
1292    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1293    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1294    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1295    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1296    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1297
1298    /* emit alternate set of surface state for gather. this
1299     * allows the surface format to be overriden for only the
1300     * gather4 messages. */
1301    if (brw->gen < 8) {
1302       if (vs && vs->nir->info->uses_texture_gather)
1303          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1304       if (tcs && tcs->nir->info->uses_texture_gather)
1305          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1306       if (tes && tes->nir->info->uses_texture_gather)
1307          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1308       if (gs && gs->nir->info->uses_texture_gather)
1309          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1310       if (fs && fs->nir->info->uses_texture_gather)
1311          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1312    }
1313
1314    if (fs) {
1315       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1316       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1317    }
1318
1319    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1320 }
1321
1322 const struct brw_tracked_state brw_texture_surfaces = {
1323    .dirty = {
1324       .mesa = _NEW_TEXTURE,
1325       .brw = BRW_NEW_BATCH |
1326              BRW_NEW_BLORP |
1327              BRW_NEW_FRAGMENT_PROGRAM |
1328              BRW_NEW_FS_PROG_DATA |
1329              BRW_NEW_GEOMETRY_PROGRAM |
1330              BRW_NEW_GS_PROG_DATA |
1331              BRW_NEW_TESS_PROGRAMS |
1332              BRW_NEW_TCS_PROG_DATA |
1333              BRW_NEW_TES_PROG_DATA |
1334              BRW_NEW_TEXTURE_BUFFER |
1335              BRW_NEW_VERTEX_PROGRAM |
1336              BRW_NEW_VS_PROG_DATA,
1337    },
1338    .emit = brw_update_texture_surfaces,
1339 };
1340
1341 static void
1342 brw_update_cs_texture_surfaces(struct brw_context *brw)
1343 {
1344    /* BRW_NEW_COMPUTE_PROGRAM */
1345    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1346
1347    /* _NEW_TEXTURE */
1348    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1349
1350    /* emit alternate set of surface state for gather. this
1351     * allows the surface format to be overriden for only the
1352     * gather4 messages.
1353     */
1354    if (brw->gen < 8) {
1355       if (cs && cs->nir->info->uses_texture_gather)
1356          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1357    }
1358
1359    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1360 }
1361
1362 const struct brw_tracked_state brw_cs_texture_surfaces = {
1363    .dirty = {
1364       .mesa = _NEW_TEXTURE,
1365       .brw = BRW_NEW_BATCH |
1366              BRW_NEW_BLORP |
1367              BRW_NEW_COMPUTE_PROGRAM,
1368    },
1369    .emit = brw_update_cs_texture_surfaces,
1370 };
1371
1372
1373 void
1374 brw_upload_ubo_surfaces(struct brw_context *brw,
1375                         struct gl_linked_shader *shader,
1376                         struct brw_stage_state *stage_state,
1377                         struct brw_stage_prog_data *prog_data)
1378 {
1379    struct gl_context *ctx = &brw->ctx;
1380
1381    if (!shader)
1382       return;
1383
1384    uint32_t *ubo_surf_offsets =
1385       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1386
1387    for (int i = 0; i < shader->NumUniformBlocks; i++) {
1388       struct gl_uniform_buffer_binding *binding =
1389          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1390
1391       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1392          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1393       } else {
1394          struct intel_buffer_object *intel_bo =
1395             intel_buffer_object(binding->BufferObject);
1396          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1397          if (!binding->AutomaticSize)
1398             size = MIN2(size, binding->Size);
1399          drm_intel_bo *bo =
1400             intel_bufferobj_buffer(brw, intel_bo,
1401                                    binding->Offset,
1402                                    size);
1403          brw_create_constant_surface(brw, bo, binding->Offset,
1404                                      size,
1405                                      &ubo_surf_offsets[i]);
1406       }
1407    }
1408
1409    uint32_t *ssbo_surf_offsets =
1410       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1411
1412    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1413       struct gl_shader_storage_buffer_binding *binding =
1414          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1415
1416       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1417          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1418       } else {
1419          struct intel_buffer_object *intel_bo =
1420             intel_buffer_object(binding->BufferObject);
1421          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1422          if (!binding->AutomaticSize)
1423             size = MIN2(size, binding->Size);
1424          drm_intel_bo *bo =
1425             intel_bufferobj_buffer(brw, intel_bo,
1426                                    binding->Offset,
1427                                    size);
1428          brw_create_buffer_surface(brw, bo, binding->Offset,
1429                                    size,
1430                                    &ssbo_surf_offsets[i]);
1431       }
1432    }
1433
1434    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1435       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1436 }
1437
1438 static void
1439 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1440 {
1441    struct gl_context *ctx = &brw->ctx;
1442    /* _NEW_PROGRAM */
1443    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1444
1445    if (!prog)
1446       return;
1447
1448    /* BRW_NEW_FS_PROG_DATA */
1449    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1450                            &brw->wm.base, brw->wm.base.prog_data);
1451 }
1452
1453 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1454    .dirty = {
1455       .mesa = _NEW_PROGRAM,
1456       .brw = BRW_NEW_BATCH |
1457              BRW_NEW_BLORP |
1458              BRW_NEW_FS_PROG_DATA |
1459              BRW_NEW_UNIFORM_BUFFER,
1460    },
1461    .emit = brw_upload_wm_ubo_surfaces,
1462 };
1463
1464 static void
1465 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1466 {
1467    struct gl_context *ctx = &brw->ctx;
1468    /* _NEW_PROGRAM */
1469    struct gl_shader_program *prog =
1470       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1471
1472    if (!prog)
1473       return;
1474
1475    /* BRW_NEW_CS_PROG_DATA */
1476    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1477                            &brw->cs.base, brw->cs.base.prog_data);
1478 }
1479
1480 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1481    .dirty = {
1482       .mesa = _NEW_PROGRAM,
1483       .brw = BRW_NEW_BATCH |
1484              BRW_NEW_BLORP |
1485              BRW_NEW_CS_PROG_DATA |
1486              BRW_NEW_UNIFORM_BUFFER,
1487    },
1488    .emit = brw_upload_cs_ubo_surfaces,
1489 };
1490
1491 void
1492 brw_upload_abo_surfaces(struct brw_context *brw,
1493                         struct gl_linked_shader *shader,
1494                         struct brw_stage_state *stage_state,
1495                         struct brw_stage_prog_data *prog_data)
1496 {
1497    struct gl_context *ctx = &brw->ctx;
1498    uint32_t *surf_offsets =
1499       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1500
1501    if (shader && shader->NumAtomicBuffers) {
1502       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1503          struct gl_atomic_buffer_binding *binding =
1504             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1505          struct intel_buffer_object *intel_bo =
1506             intel_buffer_object(binding->BufferObject);
1507          drm_intel_bo *bo = intel_bufferobj_buffer(
1508             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1509
1510          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1511                                        binding->Offset, BRW_SURFACEFORMAT_RAW,
1512                                        bo->size - binding->Offset, 1, true);
1513       }
1514
1515       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1516    }
1517 }
1518
1519 static void
1520 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1521 {
1522    struct gl_context *ctx = &brw->ctx;
1523    /* _NEW_PROGRAM */
1524    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1525
1526    if (prog) {
1527       /* BRW_NEW_FS_PROG_DATA */
1528       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1529                               &brw->wm.base, brw->wm.base.prog_data);
1530    }
1531 }
1532
1533 const struct brw_tracked_state brw_wm_abo_surfaces = {
1534    .dirty = {
1535       .mesa = _NEW_PROGRAM,
1536       .brw = BRW_NEW_ATOMIC_BUFFER |
1537              BRW_NEW_BLORP |
1538              BRW_NEW_BATCH |
1539              BRW_NEW_FS_PROG_DATA,
1540    },
1541    .emit = brw_upload_wm_abo_surfaces,
1542 };
1543
1544 static void
1545 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1546 {
1547    struct gl_context *ctx = &brw->ctx;
1548    /* _NEW_PROGRAM */
1549    struct gl_shader_program *prog =
1550       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1551
1552    if (prog) {
1553       /* BRW_NEW_CS_PROG_DATA */
1554       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1555                               &brw->cs.base, brw->cs.base.prog_data);
1556    }
1557 }
1558
1559 const struct brw_tracked_state brw_cs_abo_surfaces = {
1560    .dirty = {
1561       .mesa = _NEW_PROGRAM,
1562       .brw = BRW_NEW_ATOMIC_BUFFER |
1563              BRW_NEW_BLORP |
1564              BRW_NEW_BATCH |
1565              BRW_NEW_CS_PROG_DATA,
1566    },
1567    .emit = brw_upload_cs_abo_surfaces,
1568 };
1569
1570 static void
1571 brw_upload_cs_image_surfaces(struct brw_context *brw)
1572 {
1573    struct gl_context *ctx = &brw->ctx;
1574    /* _NEW_PROGRAM */
1575    struct gl_shader_program *prog =
1576       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1577
1578    if (prog) {
1579       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1580       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1581                                 &brw->cs.base, brw->cs.base.prog_data);
1582    }
1583 }
1584
1585 const struct brw_tracked_state brw_cs_image_surfaces = {
1586    .dirty = {
1587       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1588       .brw = BRW_NEW_BATCH |
1589              BRW_NEW_BLORP |
1590              BRW_NEW_CS_PROG_DATA |
1591              BRW_NEW_IMAGE_UNITS
1592    },
1593    .emit = brw_upload_cs_image_surfaces,
1594 };
1595
1596 static uint32_t
1597 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1598 {
1599    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1600    uint32_t hw_format = brw_format_for_mesa_format(format);
1601    if (access == GL_WRITE_ONLY) {
1602       return hw_format;
1603    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1604       /* Typed surface reads support a very limited subset of the shader
1605        * image formats.  Translate it into the closest format the
1606        * hardware supports.
1607        */
1608       return isl_lower_storage_image_format(devinfo, hw_format);
1609    } else {
1610       /* The hardware doesn't actually support a typed format that we can use
1611        * so we have to fall back to untyped read/write messages.
1612        */
1613       return BRW_SURFACEFORMAT_RAW;
1614    }
1615 }
1616
1617 static void
1618 update_default_image_param(struct brw_context *brw,
1619                            struct gl_image_unit *u,
1620                            unsigned surface_idx,
1621                            struct brw_image_param *param)
1622 {
1623    memset(param, 0, sizeof(*param));
1624    param->surface_idx = surface_idx;
1625    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1626     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1627     * detailed explanation of these parameters.
1628     */
1629    param->swizzling[0] = 0xff;
1630    param->swizzling[1] = 0xff;
1631 }
1632
1633 static void
1634 update_buffer_image_param(struct brw_context *brw,
1635                           struct gl_image_unit *u,
1636                           unsigned surface_idx,
1637                           struct brw_image_param *param)
1638 {
1639    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1640    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1641    update_default_image_param(brw, u, surface_idx, param);
1642
1643    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1644    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1645 }
1646
1647 static void
1648 update_texture_image_param(struct brw_context *brw,
1649                            struct gl_image_unit *u,
1650                            unsigned surface_idx,
1651                            struct brw_image_param *param)
1652 {
1653    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1654
1655    update_default_image_param(brw, u, surface_idx, param);
1656
1657    param->size[0] = minify(mt->logical_width0, u->Level);
1658    param->size[1] = minify(mt->logical_height0, u->Level);
1659    param->size[2] = (!u->Layered ? 1 :
1660                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1661                      u->TexObj->Target == GL_TEXTURE_3D ?
1662                      minify(mt->logical_depth0, u->Level) :
1663                      mt->logical_depth0);
1664
1665    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1666                                   &param->offset[0],
1667                                   &param->offset[1]);
1668
1669    param->stride[0] = mt->cpp;
1670    param->stride[1] = mt->pitch / mt->cpp;
1671    param->stride[2] =
1672       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1673    param->stride[3] =
1674       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1675
1676    if (mt->tiling == I915_TILING_X) {
1677       /* An X tile is a rectangular block of 512x8 bytes. */
1678       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1679       param->tiling[1] = _mesa_logbase2(8);
1680
1681       if (brw->has_swizzling) {
1682          /* Right shifts required to swizzle bits 9 and 10 of the memory
1683           * address with bit 6.
1684           */
1685          param->swizzling[0] = 3;
1686          param->swizzling[1] = 4;
1687       }
1688    } else if (mt->tiling == I915_TILING_Y) {
1689       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1690        * different to the layout of an X-tiled surface, we simply pretend that
1691        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1692        * one arranged in X-major order just like is the case for X-tiling.
1693        */
1694       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1695       param->tiling[1] = _mesa_logbase2(32);
1696
1697       if (brw->has_swizzling) {
1698          /* Right shift required to swizzle bit 9 of the memory address with
1699           * bit 6.
1700           */
1701          param->swizzling[0] = 3;
1702       }
1703    }
1704
1705    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1706     * address calculation algorithm (emit_address_calculation() in
1707     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1708     * modulus equal to the LOD.
1709     */
1710    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1711                        0);
1712 }
1713
1714 static void
1715 update_image_surface(struct brw_context *brw,
1716                      struct gl_image_unit *u,
1717                      GLenum access,
1718                      unsigned surface_idx,
1719                      uint32_t *surf_offset,
1720                      struct brw_image_param *param)
1721 {
1722    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1723       struct gl_texture_object *obj = u->TexObj;
1724       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1725
1726       if (obj->Target == GL_TEXTURE_BUFFER) {
1727          struct intel_buffer_object *intel_obj =
1728             intel_buffer_object(obj->BufferObject);
1729          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1730                                       _mesa_get_format_bytes(u->_ActualFormat));
1731
1732          brw_emit_buffer_surface_state(
1733             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1734             format, intel_obj->Base.Size, texel_size,
1735             access != GL_READ_ONLY);
1736
1737          update_buffer_image_param(brw, u, surface_idx, param);
1738
1739       } else {
1740          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1741          struct intel_mipmap_tree *mt = intel_obj->mt;
1742
1743          if (format == BRW_SURFACEFORMAT_RAW) {
1744             brw_emit_buffer_surface_state(
1745                brw, surf_offset, mt->bo, mt->offset,
1746                format, mt->bo->size - mt->offset, 1 /* pitch */,
1747                access != GL_READ_ONLY);
1748
1749          } else {
1750             const unsigned num_layers = (!u->Layered ? 1 :
1751                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1752                                          mt->logical_depth0);
1753
1754             struct isl_view view = {
1755                .format = format,
1756                .base_level = obj->MinLevel + u->Level,
1757                .levels = 1,
1758                .base_array_layer = obj->MinLayer + u->_Layer,
1759                .array_len = num_layers,
1760                .swizzle = ISL_SWIZZLE_IDENTITY,
1761                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1762             };
1763
1764             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1765             const int flags =
1766                mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED ?
1767                INTEL_AUX_BUFFER_DISABLED : 0;
1768             brw_emit_surface_state(brw, mt, flags, mt->target, view,
1769                                    surface_state_infos[brw->gen].tex_mocs,
1770                                    surf_offset, surf_index,
1771                                    I915_GEM_DOMAIN_SAMPLER,
1772                                    access == GL_READ_ONLY ? 0 :
1773                                              I915_GEM_DOMAIN_SAMPLER);
1774          }
1775
1776          update_texture_image_param(brw, u, surface_idx, param);
1777       }
1778
1779    } else {
1780       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1781       update_default_image_param(brw, u, surface_idx, param);
1782    }
1783 }
1784
1785 void
1786 brw_upload_image_surfaces(struct brw_context *brw,
1787                           struct gl_linked_shader *shader,
1788                           struct brw_stage_state *stage_state,
1789                           struct brw_stage_prog_data *prog_data)
1790 {
1791    struct gl_context *ctx = &brw->ctx;
1792
1793    if (shader && shader->NumImages) {
1794       for (unsigned i = 0; i < shader->NumImages; i++) {
1795          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1796          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1797
1798          update_image_surface(brw, u, shader->ImageAccess[i],
1799                               surf_idx,
1800                               &stage_state->surf_offset[surf_idx],
1801                               &prog_data->image_param[i]);
1802       }
1803
1804       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1805       /* This may have changed the image metadata dependent on the context
1806        * image unit state and passed to the program as uniforms, make sure
1807        * that push and pull constants are reuploaded.
1808        */
1809       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1810    }
1811 }
1812
1813 static void
1814 brw_upload_wm_image_surfaces(struct brw_context *brw)
1815 {
1816    struct gl_context *ctx = &brw->ctx;
1817    /* BRW_NEW_FRAGMENT_PROGRAM */
1818    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1819
1820    if (prog) {
1821       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1822       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1823                                 &brw->wm.base, brw->wm.base.prog_data);
1824    }
1825 }
1826
1827 const struct brw_tracked_state brw_wm_image_surfaces = {
1828    .dirty = {
1829       .mesa = _NEW_TEXTURE,
1830       .brw = BRW_NEW_BATCH |
1831              BRW_NEW_BLORP |
1832              BRW_NEW_FRAGMENT_PROGRAM |
1833              BRW_NEW_FS_PROG_DATA |
1834              BRW_NEW_IMAGE_UNITS
1835    },
1836    .emit = brw_upload_wm_image_surfaces,
1837 };
1838
1839 void
1840 gen4_init_vtable_surface_functions(struct brw_context *brw)
1841 {
1842    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1843    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1844 }
1845
1846 void
1847 gen6_init_vtable_surface_functions(struct brw_context *brw)
1848 {
1849    gen4_init_vtable_surface_functions(brw);
1850    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1851 }
1852
1853 static void
1854 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1855 {
1856    struct gl_context *ctx = &brw->ctx;
1857    /* _NEW_PROGRAM */
1858    struct gl_shader_program *prog =
1859       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1860    /* BRW_NEW_CS_PROG_DATA */
1861    const struct brw_cs_prog_data *cs_prog_data =
1862       brw_cs_prog_data(brw->cs.base.prog_data);
1863
1864    if (prog && cs_prog_data->uses_num_work_groups) {
1865       const unsigned surf_idx =
1866          cs_prog_data->binding_table.work_groups_start;
1867       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1868       drm_intel_bo *bo;
1869       uint32_t bo_offset;
1870
1871       if (brw->compute.num_work_groups_bo == NULL) {
1872          bo = NULL;
1873          intel_upload_data(brw,
1874                            (void *)brw->compute.num_work_groups,
1875                            3 * sizeof(GLuint),
1876                            sizeof(GLuint),
1877                            &bo,
1878                            &bo_offset);
1879       } else {
1880          bo = brw->compute.num_work_groups_bo;
1881          bo_offset = brw->compute.num_work_groups_offset;
1882       }
1883
1884       brw_emit_buffer_surface_state(brw, surf_offset,
1885                                     bo, bo_offset,
1886                                     BRW_SURFACEFORMAT_RAW,
1887                                     3 * sizeof(GLuint), 1, true);
1888       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1889    }
1890 }
1891
1892 const struct brw_tracked_state brw_cs_work_groups_surface = {
1893    .dirty = {
1894       .brw = BRW_NEW_BLORP |
1895              BRW_NEW_CS_PROG_DATA |
1896              BRW_NEW_CS_WORK_GROUPS
1897    },
1898    .emit = brw_upload_cs_work_groups_surface,
1899 };